def deleteEarlyModel(self): collection = getConnection('mongo')[self.modelColName] try: buildids = [] header = self.HEADERS_JSON for item in collection.find().sort("time", ASCENDING): buildids.append(item) # print(buildids) if len(buildids) > 0: buildurl = self.endpoint + '/models/%s/builds/%s' % (self.modelid, buildids[0]["buildId"]) resp = requests.delete(buildurl, headers=header) print("delete status = ", resp.status_code) # result = json.loads(resp.text) if resp.status_code == 200 or resp.status_code == 204: collection.delete_one(buildids[0]) except: print_exc()
def generateUsageFile(self, size, users = [], filename = 'usage.txt'): collection = getConnection('mongo')[self.colName] # temp = tempfile.TemporaryFile() try: items = [] for news in collection.find({}): items.append(news) with open(filename, 'wb') as file: for i in range(0, size): # TODO: look up usage file format file.write((','.join([ users[random.randint(0, len(users)-1)].replace('@','-').replace('.','_'), # userid str(items[random.randint(0, len(items)-1)]["_id"]), # itemid datetime.now().strftime(self.timeFormat), "Purchase" ])+"\n").encode()) except: print_exc() finally: # temp.close() pass
def buildRelated(self): collection = getConnection('mongo')[self.colName] model = gensim.models.Word2Vec.load(self.word2vecModel) try: for news in collection.find({ "related_words": { "$exists": 0 } }, projection={"keywords":1, "title": 1, "_id": 1}): relatedWords = set() for keyword in news["keywords"]: try: relatedWords |= set(map(lambda t: t[0], model.most_similar(keyword))) except KeyError: print("KeyError:", keyword) continue print(news["title"]) collection.update_one({"_id": news["_id"]}, {"$set": { "related_words": list(relatedWords) }}) except: print_exc() finally: model = None gc.collect()
def triggerBuild(self): collection = getConnection('mongo')[self.modelColName] header = self.HEADERS_JSON buildurl = self.endpoint + '/models/%s/builds?' % (self.modelid) body = json.dumps({ "description": "Simple recomendations build", "buildType": "recommendation", "buildParameters": { "recommendation": { "numberOfModelIterations": 40, "numberOfModelDimensions": 20, "itemCutOffLowerBound": 1, "itemCutOffUpperBound": 10, "userCutOffLowerBound": 0, "userCutOffUpperBound": 0, "enableModelingInsights": False, "useFeaturesInModel": True, "modelingFeatureList": "tag", "allowColdItemPlacement": True, "enableFeatureCorrelation": True, "reasoningFeatureList": "tag", "enableU2I": True } } }) try: resp = requests.post(buildurl, body, headers=header) result = json.loads(resp.text) print("url = ", buildurl) print(result) if resp.status_code == 202: # success collection.insert({ "buildId": result["buildId"], "time": datetime.now().strftime(self.timeFormat), "modelId": self.modelid, "token": self.token }) except: print_exc()
def updateCatalog(self): catalogurl = self.endpoint + '/models/%s/catalog' % self.modelid header = self.HEADERS_STREAM collection = getConnection('mongo')[self.colName] temp = tempfile.TemporaryFile() try: for news in collection.find({ "uploaded": False }): tags = [] itemName = "||".join([news["genre"], news["title"], news["imgurls"][0] if len(news["imgurls"]) > 0 else ""]) for tag in news["keywords"]: tags.append("tag="+tag) temp.write((','.join([str(news["_id"]), itemName, news["genre"], ""] + tags)+"\n").encode()) temp.seek(0) resp = requests.patch(catalogurl, data=temp, headers=header) result = json.loads(resp.text) print("result = " + str(result)) if resp.status_code == 200: collection.update_many({ "uploaded": False }, { "$set": { "uploaded": True } }, upsert=False) except: print_exc() finally: temp.close()