def RankImages(self, images): ranks = defaultdict(int) try: for image in images: if image.find("svg") < 0: try: face_cascade = cv2.CascadeClassifier( os.path.abspath( os.path.join( Config.ENV["OPEN_CV_HOME"], 'haarcascades\haarcascade_frontalface_default.xml' ))) img = self.GetImageFromURL(image) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.3, 5) ranks[image] = len(list(faces)) print("face count : ", ranks[image]) except Exception as e: print("Exception: ", e) Logger.Log( "Feed chef:Rank Images:image processing error", e.message) ranks[image] = -1 continue inverserMat = [ x[0] for x in sorted( ranks.items(), key=lambda x: x[1], reverse=True) ] return inverserMat except Exception as e: print("Exception: ", e) Logger.Log("FeedChef Error:Rank Images:", e.message) return images
def DeleteFile(absPath): try: os.remove(absPath) return True except Exception as e: Logger.Log("Exception", e) return False
def GetFeed(): try: lastId = request.json['lastId'] if (request.json != None and 'count' in request.json): count = request.json['count'] else: count = 10 data = WikiScrapper.GetFeed(lastId) success = True return jsonify({'success': success, 'data': data}) except Exception as e: success = False print(e, "GetFeed") Logger.Log("Error", e.message) return jsonify({ 'success': success, 'data': [{ "topic": topic, "title": "LoremIpsem", "images": [""], "content": "No content", "summary": "No page retrieved" }] })
def GetNext(self, lastId, count=10): try: if (lastId == 0): records = list( self.collection.find({ "isReady": { "$exists": False } }).limit(count)) else: records = list( self.collection.find({ "$and": [{ "_id": { "$gt": lastId } }, { "isReady": { "$exists": False } }] }).limit(count)) print(records) if len(records) > 1: self._lastId = records[-1]["_id"] else: self._lastId = -1 self.ProcessedAllRecords = True return records except Exception as e: print(e) Logger.Log("FeedChef : Get Next", e.message) return []
def AddPageToDB(page): try: mongo.db.wikipediaFeed.insert(page) except Exception as e: print(e) Logger.Log("DBAccess:Error - WikipediaFeed.AddPageToDB", "Failed to insert page")
def GetFeed(lastId , count=2): try: feed = WikiFeed.GetPages(lastId) except Exception as e: print(e) Logger.Log("Error WikiScrapper.GetFeed" , e.message) return [{"topic" : topic , "title":"LoremIpsem" , "images": [""] ,"content" : "No content" , "summary": "No page retrieved" }] return {"feed" : feed , "lastId" : feed[-1]["createddatetime"]}
def GetPage(topic): try: page = wikipedia.page(topic) return {"topic" : topic , "title":page.title.replace("\"" , "\\\"") , "images": WikiScrapper.ScrubImageArray(page.images) ,"content" : page.content.replace("\"" , "\\\"") , "summary": page.summary.replace("\"" , "\\\"") ,"createddatetime" : str(datetime.datetime.now())} except Exception as e: print(e) Logger.Log("Error" , e.message) return None
def GetFileMetaData(id): try: data = mongo.db.uploadedFiles.find_one({"_id": ObjectId(id)}) return data except Exception as e: print(e) Logger.Log("Exception", e) return None
def DeleteFileMetaData(id): try: success = mongo.db.uploadedFiles.delete_one({"_id": ObjectId(id)}) return success.deleted_count > 0 except Exception as e: print(e) Logger.Log("Exception DeleteFileMetaData", e) return False
def run(self): print("Running....") try: db = RawDBAccess.GetMongoDB() table = db.wikipediaFeed print("Starting ") Logger.Log("FeedJob", "Feed job starting") Logger.Log("FeedJob", "Feed job preparing feed") print("PrepareFeed....") WikiScrapper.PrepareFeed(table) Logger.Log("FeedJob", "Feed job processing feed") print("Process Feed....") chef = FeedChef() chef.ProcessRecords() Logger.Log("FeedJob", "Feed job processing feed done") except Exception as e: print(e) Logger.Log("Exception :FeedCreate job", e.message)
def GetAllPlotLyGraphs(): try: return [{ "name": x["name"], "url": x["url"], "_id": str(x["_id"]) } for x in mongo.db.plotly.find({})] except Exception as e: Logger.Log("DBAccess:Error", "Failed to Get PlotLy") return []
def PrepareFeed(table): try: topics = WikiScrapper.GetTopics() for topic in topics: try: print("Getting page for" , topic) page = WikiScrapper.GetPage(topic) print("Saving page to Db" , topic) if page != None: AddPageToDB(table,page) except Exception as e: print(e) Logger.Log("Error WikiScrapper.PrepareFeed Inner Loop" , e.message) continue chef = FeedChef() chef.ProcessRecords() except Exception as e: print(e) Logger.Log("Error WikiScrapper.PrepareFeed" , e.message)
def GetImageFromURL(self, url): try: url_response = urllib.urlopen(url) img_array = np.array(bytearray(url_response.read()), dtype=np.uint8) img = cv2.imdecode(img_array, -1) print("got image", img != None) return img except Exception as e: print("Exception : ", e) Logger.Log("FeedChef:GetImageFromURL", e.message) return None
def GetAllUploadedFilesForUser(username): try: files = mongo.db.uploadedFiles.find({"creator": username}) results = [{ "name": f["userGivenFileName"], "tags": f["tags"], "id": str(f["_id"]) } for f in files] return results except Exception as e: Logger.Log("Exception", e) print(e) return []
def ProcessRecords(self, refresh=False, func=None): try: self.db = RawDBAccess.GetMongoDB() self.collection = self.db[self._wikipediaFeedCollection] if refresh == True: self.collection.update({}, {"$unset": {"isReady": ""}}) while self.ProcessedAllRecords == False: for record in self.GetNext(self._lastId, 10): print("Got next 10 records") if func != None: try: func(record) except Exception as e: print(e) Logger.Log("Feed chef custom function error", e.message) continue else: try: print("Beginning : Keyword Extract") keywords, keypoints = Helpers.ExtractKeywords( record["content"], 10) print("Done : Keyword Extract") record["keywords"] = keywords record["keypoints"] = keypoints print("Saved : Keyword Extract") except Exception as e: print(e) Logger.Log("Feed Chef :Error processing record", e.message) record["isReady"] = True self.collection.save(record) except Exception as e: print(e) Logger.Log("Feed Chef :Error accessing db", e.message)
def GetPages(lastId): try: if (lastId == 0): pages = mongo.db.wikipediaFeed.find().sort([ ("createddatetime", pymongo.DESCENDING) ]).limit(WikipediaFeed.PageSize) return [GetDataItemWithId(p) for p in pages] else: pages = mongo.db.wikipediaFeed.find({ "createddatetime": { "$lt": lastId } }).sort([("createddatetime", pymongo.DESCENDING) ]).limit(WikipediaFeed.PageSize) return [GetDataItemWithId(p) for p in pages] except Exception as e: print(e) Logger.Log("DBAccess:Error - WikipediaFeed.GetPages", "Failed to insert page")
def GetPage(): try: topic = request.json['topic'] data = WikiScrapper.GetPage(topic) success = True return jsonify({'success': success, 'data': data}) except Exception as e: success = False print(e) Logger.Log("Error", e.message) return jsonify({ 'success': success, 'data': { "topic": topic, "title": "LoremIpsem", "images": [""], "content": "No content", "summary": "No page retrieved" } })
def SaveFileMetaData(userGivenFileName, systemGeneratedFileName, creator, tags, absPath, fileType, convertToHtml): try: success = mongo.db.uploadedFiles.insert({ "userGivenFileName": userGivenFileName, "systemGeneratedFileName": systemGeneratedFileName, "creator": creator, "tags": tags, "absPath": absPath, "fileType": fileType, "convertToHtml": convertToHtml }) return success except Exception as e: Logger.Log("Exception", e) return False
def AddPageToDB(table ,page): try: table.insert_one(page) except Exception as e: print(e) Logger.Log("Error WikiScrapper.PrepareFeed Inner Loop" , e.message)