Exemple #1
0
    def RankImages(self, images):
        ranks = defaultdict(int)
        try:
            for image in images:
                if image.find("svg") < 0:
                    try:
                        face_cascade = cv2.CascadeClassifier(
                            os.path.abspath(
                                os.path.join(
                                    Config.ENV["OPEN_CV_HOME"],
                                    'haarcascades\haarcascade_frontalface_default.xml'
                                )))
                        img = self.GetImageFromURL(image)
                        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                        faces = face_cascade.detectMultiScale(gray, 1.3, 5)
                        ranks[image] = len(list(faces))
                        print("face count : ", ranks[image])
                    except Exception as e:
                        print("Exception: ", e)
                        Logger.Log(
                            "Feed chef:Rank Images:image processing error",
                            e.message)
                        ranks[image] = -1
                        continue

            inverserMat = [
                x[0] for x in sorted(
                    ranks.items(), key=lambda x: x[1], reverse=True)
            ]
            return inverserMat
        except Exception as e:
            print("Exception: ", e)
            Logger.Log("FeedChef Error:Rank Images:", e.message)
            return images
Exemple #2
0
 def DeleteFile(absPath):
     try:
         os.remove(absPath)
         return True
     except Exception as e:
         Logger.Log("Exception", e)
         return False
Exemple #3
0
def GetFeed():
    try:
        lastId = request.json['lastId']
        if (request.json != None and 'count' in request.json):
            count = request.json['count']
        else:
            count = 10
        data = WikiScrapper.GetFeed(lastId)
        success = True
        return jsonify({'success': success, 'data': data})
    except Exception as e:
        success = False
        print(e, "GetFeed")
        Logger.Log("Error", e.message)
        return jsonify({
            'success':
            success,
            'data': [{
                "topic": topic,
                "title": "LoremIpsem",
                "images": [""],
                "content": "No content",
                "summary": "No page retrieved"
            }]
        })
Exemple #4
0
    def GetNext(self, lastId, count=10):
        try:
            if (lastId == 0):
                records = list(
                    self.collection.find({
                        "isReady": {
                            "$exists": False
                        }
                    }).limit(count))
            else:
                records = list(
                    self.collection.find({
                        "$and": [{
                            "_id": {
                                "$gt": lastId
                            }
                        }, {
                            "isReady": {
                                "$exists": False
                            }
                        }]
                    }).limit(count))

            print(records)
            if len(records) > 1:
                self._lastId = records[-1]["_id"]
            else:
                self._lastId = -1
                self.ProcessedAllRecords = True

            return records
        except Exception as e:
            print(e)
            Logger.Log("FeedChef : Get Next", e.message)
        return []
Exemple #5
0
 def AddPageToDB(page):
     try:
         mongo.db.wikipediaFeed.insert(page)
     except Exception as e:
         print(e)
         Logger.Log("DBAccess:Error - WikipediaFeed.AddPageToDB",
                    "Failed to insert page")
Exemple #6
0
 def GetFeed(lastId , count=2):
     try:
         feed = WikiFeed.GetPages(lastId)
     except Exception as e:
         print(e)
         Logger.Log("Error WikiScrapper.GetFeed" , e.message)
         return [{"topic" : topic , "title":"LoremIpsem" , "images": [""] ,"content" : "No content" , "summary": "No page retrieved" }]
     return {"feed" : feed , "lastId" : feed[-1]["createddatetime"]}
Exemple #7
0
 def GetPage(topic):
     try:
         page = wikipedia.page(topic)
         return {"topic" : topic , "title":page.title.replace("\"" , "\\\"")  , "images": WikiScrapper.ScrubImageArray(page.images) ,"content" : page.content.replace("\"" , "\\\"") , "summary": page.summary.replace("\"" , "\\\"") ,"createddatetime" : str(datetime.datetime.now())}
     except Exception as e:
         print(e)
         Logger.Log("Error" , e.message)
         return None
Exemple #8
0
 def GetFileMetaData(id):
     try:
         data = mongo.db.uploadedFiles.find_one({"_id": ObjectId(id)})
         return data
     except Exception as e:
         print(e)
         Logger.Log("Exception", e)
         return None
Exemple #9
0
 def DeleteFileMetaData(id):
     try:
         success = mongo.db.uploadedFiles.delete_one({"_id": ObjectId(id)})
         return success.deleted_count > 0
     except Exception as e:
         print(e)
         Logger.Log("Exception DeleteFileMetaData", e)
         return False
Exemple #10
0
 def run(self):
     print("Running....")
     try:
         db = RawDBAccess.GetMongoDB()
         table = db.wikipediaFeed
         print("Starting ")
         Logger.Log("FeedJob", "Feed job starting")
         Logger.Log("FeedJob", "Feed job preparing feed")
         print("PrepareFeed....")
         WikiScrapper.PrepareFeed(table)
         Logger.Log("FeedJob", "Feed job processing feed")
         print("Process Feed....")
         chef = FeedChef()
         chef.ProcessRecords()
         Logger.Log("FeedJob", "Feed job processing feed done")
     except Exception as e:
         print(e)
         Logger.Log("Exception :FeedCreate job", e.message)
Exemple #11
0
 def GetAllPlotLyGraphs():
     try:
         return [{
             "name": x["name"],
             "url": x["url"],
             "_id": str(x["_id"])
         } for x in mongo.db.plotly.find({})]
     except Exception as e:
         Logger.Log("DBAccess:Error", "Failed to Get PlotLy")
         return []
Exemple #12
0
 def PrepareFeed(table):
     try:
         topics = WikiScrapper.GetTopics()
         for topic in topics:
             try:
                 print("Getting page for" , topic)
                 page = WikiScrapper.GetPage(topic)
                 print("Saving page to Db" , topic)
                 if page != None:
                     AddPageToDB(table,page)
             except Exception as e:
                 print(e)
                 Logger.Log("Error WikiScrapper.PrepareFeed Inner Loop" , e.message)
                 continue
         chef = FeedChef()
         chef.ProcessRecords()
     except Exception as e:
         print(e)
         Logger.Log("Error WikiScrapper.PrepareFeed" , e.message)
Exemple #13
0
 def GetImageFromURL(self, url):
     try:
         url_response = urllib.urlopen(url)
         img_array = np.array(bytearray(url_response.read()),
                              dtype=np.uint8)
         img = cv2.imdecode(img_array, -1)
         print("got image", img != None)
         return img
     except Exception as e:
         print("Exception : ", e)
         Logger.Log("FeedChef:GetImageFromURL", e.message)
         return None
Exemple #14
0
 def GetAllUploadedFilesForUser(username):
     try:
         files = mongo.db.uploadedFiles.find({"creator": username})
         results = [{
             "name": f["userGivenFileName"],
             "tags": f["tags"],
             "id": str(f["_id"])
         } for f in files]
         return results
     except Exception as e:
         Logger.Log("Exception", e)
         print(e)
         return []
Exemple #15
0
    def ProcessRecords(self, refresh=False, func=None):
        try:
            self.db = RawDBAccess.GetMongoDB()
            self.collection = self.db[self._wikipediaFeedCollection]
            if refresh == True:
                self.collection.update({}, {"$unset": {"isReady": ""}})

            while self.ProcessedAllRecords == False:
                for record in self.GetNext(self._lastId, 10):
                    print("Got next 10 records")
                    if func != None:
                        try:
                            func(record)
                        except Exception as e:
                            print(e)
                            Logger.Log("Feed chef custom function error",
                                       e.message)
                            continue
                    else:
                        try:
                            print("Beginning :  Keyword Extract")
                            keywords, keypoints = Helpers.ExtractKeywords(
                                record["content"], 10)
                            print("Done : Keyword Extract")
                            record["keywords"] = keywords
                            record["keypoints"] = keypoints

                            print("Saved : Keyword Extract")
                        except Exception as e:
                            print(e)
                            Logger.Log("Feed Chef :Error processing record",
                                       e.message)
                    record["isReady"] = True
                    self.collection.save(record)

        except Exception as e:
            print(e)
            Logger.Log("Feed Chef :Error accessing db", e.message)
Exemple #16
0
 def GetPages(lastId):
     try:
         if (lastId == 0):
             pages = mongo.db.wikipediaFeed.find().sort([
                 ("createddatetime", pymongo.DESCENDING)
             ]).limit(WikipediaFeed.PageSize)
             return [GetDataItemWithId(p) for p in pages]
         else:
             pages = mongo.db.wikipediaFeed.find({
                 "createddatetime": {
                     "$lt": lastId
                 }
             }).sort([("createddatetime", pymongo.DESCENDING)
                      ]).limit(WikipediaFeed.PageSize)
             return [GetDataItemWithId(p) for p in pages]
     except Exception as e:
         print(e)
         Logger.Log("DBAccess:Error - WikipediaFeed.GetPages",
                    "Failed to insert page")
Exemple #17
0
def GetPage():
    try:
        topic = request.json['topic']
        data = WikiScrapper.GetPage(topic)
        success = True
        return jsonify({'success': success, 'data': data})
    except Exception as e:
        success = False
        print(e)
        Logger.Log("Error", e.message)
        return jsonify({
            'success': success,
            'data': {
                "topic": topic,
                "title": "LoremIpsem",
                "images": [""],
                "content": "No content",
                "summary": "No page retrieved"
            }
        })
Exemple #18
0
 def SaveFileMetaData(userGivenFileName, systemGeneratedFileName, creator,
                      tags, absPath, fileType, convertToHtml):
     try:
         success = mongo.db.uploadedFiles.insert({
             "userGivenFileName":
             userGivenFileName,
             "systemGeneratedFileName":
             systemGeneratedFileName,
             "creator":
             creator,
             "tags":
             tags,
             "absPath":
             absPath,
             "fileType":
             fileType,
             "convertToHtml":
             convertToHtml
         })
         return success
     except Exception as e:
         Logger.Log("Exception", e)
         return False
Exemple #19
0
def AddPageToDB(table ,page):
    try:
        table.insert_one(page)
    except Exception as e:
        print(e)
        Logger.Log("Error WikiScrapper.PrepareFeed Inner Loop" , e.message)