Ejemplo n.º 1
0
def read_from_db(data_file):
    '''Reads the data from database and exports to specified JSON file'''
    global LIMIT_COUNT
    count = 1
    json_docs = []

    db = MongoDB()
    cursor = db.query({})

    if cursor:
        for doc in cursor:
            pprint.pprint(doc)
            json_doc = json.dumps(doc, default=json_util.default)
            json_docs.append(json_doc)
            if LIMIT_COUNT != 0 and count > LIMIT_COUNT:
                print "\nLimited IDs Read \n"
                break
            count += 1

    db.close()

    print count-1, "Records Found. \n"
    json_data_file = [json.loads(j_doc, object_hook=json_util.object_hook) for j_doc in json_docs]

    write_path = os.path.join(*[DIR, "data", data_file])
    with open(write_path, 'w') as outfile:
        json.dump(json_data_file, outfile, default=date_handler, indent=4, sort_keys=True)
Ejemplo n.º 2
0
    def search_images_for_city(city_id: int,
                               limit: int = 1000,
                               batch_size: int = 50,
                               common_limit: bool = False,
                               only_new_posts: bool = False):
        collection = MongoDB.db()['cities']
        place_collection = MongoDB.db()['places']
        query = {}
        if city_id is not None:
            query['city_id'] = city_id
        cities = []
        for city in collection.find(query):
            buffer_city = {
                'name': city['name'],
                '_id': city['_id'],
                'places': []
            }
            for place in place_collection.find({'city_id': city['_id']}):
                buffer_city['places'].append(place['uid'])
            cities.append(buffer_city)

        for city in cities:
            print('City `%s` has %d places' %
                  (city['name'],
                   place_collection.count({'city_id': city['_id']})))
            for place_id in city['places']:
                DataLoader.search_images_for_place(
                    place_id,
                    limit,
                    batch_size,
                    common_limit=common_limit,
                    only_new_posts=only_new_posts)
Ejemplo n.º 3
0
def insert_to_db():
    '''Inserts scrapped data to database'''

    global LIMIT_COUNT
    count = 1
    idlist = read_json('import.json')

    db = MongoDB()
    print "Initiate Scraping for ", len(idlist), "records ...\n\n"

    for storeid in idlist:
        print "Scraping Data: " + str(count) + "\n"
        extn = scrap_data(storeid["_id"])
        extn['updated'] = datetime.datetime.utcnow()
        print extn
        print "Inserted", db.insert({"_id":storeid["_id"]},
                                    {"$set":extn, "$setOnInsert": {"created": datetime.datetime.utcnow()}}), "data.\n"

        if LIMIT_COUNT != 0 and count > LIMIT_COUNT:
            print "Update Limit Has Been Set \n"
            break
        count += 1

    print count-1, "records updated. \n"
    db.close()
Ejemplo n.º 4
0
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds = HPFeeds()
        self.MAEC = MAEC(thug_version)
        self.MongoDB = MongoDB()
        self.shellcodes = set()
Ejemplo n.º 5
0
 def __init__(self, update_rank_list):
     self.db = MongoDB(update_rank_list=update_rank_list)
     self.target_dict = {}
     # print('Update once when initialized & take a look at time')
     start_time = time.time()
     self.whole_data_bundle()
     self.period_seconds = int(time.time() - start_time) * 2
def query():
    """
    given {start_time, end_time, grid_id [optional]}
    return -> [{grid_id, volume}, ...]
    """
    start_time = datetime.strptime(request.query.get('start_time'), '%Y-%m-%dT%H:%M:%SZ')
    end_time = datetime.strptime(request.query.get('end_time'), '%Y-%m-%dT%H:%M:%SZ')
    grid_id = request.query.get('grid_id')

    mg = MongoDB()
    mg.connect()

    print('querying grid volumes...')

#     if grid_id:
#         results = mg.group_by([{'$match': {'created_at': {'$gt': start_time, '$lt': end_time}, 'grid_id': grid_id}},
#                                {'$group': {'_id': '$grid_id', 'count': {'$sum': 1}}}
#                                ])
#     else:
#         results = mg.group_by([{'$match': {'created_at': {'$gt': start_time, '$lt': end_time}}},
#                                {'$group': {'_id': '$grid_id', 'count': {'$sum': 1}}}
#                                ])
#    group and count distinct user:
    results = mg.group_by([ {'$match': {'created_at': {'$gt': start_time, '$lt': end_time}}},
                            {'$group': {'_id':{ 'grid_id':'$grid_id', 'user_id':'$user_id' }, 'count': {'$sum': 1} } },
                            {'$group': {'_id': '$_id.grid_id', 'count': {'$sum': 1}}}
                          ])

    ret = []
    for result in results:
        ret.append({'grid_id': result['_id'], 'volume': result['count']})

    response.content_type = 'application/json'
    return json_dumps(ret, indent=2)
Ejemplo n.º 7
0
    async def on_ready(self):
        """
        Implementing discord.Client on_ready() that is called when the bot is ready

        We do any additional post-initialization set-up here
        """

        print('Logged on as {0}!'.format(self.user))
        self.db = MongoDB()
Ejemplo n.º 8
0
class DiscordClient(discord.Client):
    """
    Creates an instance of the Bot

    Attributes
    __________
    db: (MongoDB obj)
        Instance of the custom MongoDB class to fetch and update the database

    Functions
    __________
    async on_ready()
        Implementing discord.Client on_ready() that is called when the bot is ready
    async def on_reaction_add(reaction, user)
        Implementing discord.Client on_reaction_add() that is called when a reaction is added to a message
    async on_message(message)
        Implementing discord.Client on_message() that is called when a user messages
        in a server (discord.Guild)

    """
    db = None

    async def on_ready(self):
        """
        Implementing discord.Client on_ready() that is called when the bot is ready

        We do any additional post-initialization set-up here
        """

        print('Logged on as {0}!'.format(self.user))
        self.db = MongoDB()

    async def on_reaction_add(self, reaction, user):
        """
        Implementing discord.Client on_reaction_add() that is called when a reaction is added to a message

        We call the corresponding database method to add that reaction to the message in our database
        """

        self.db.addReactToMessageInDb(reaction)

    async def on_message(self, message):
        """
        Implementing discord.Client on_message() that is called when a user messages
        in a server (discord.Guild)

        This is where we add the new message into the database
        """

        if message.author == self.user:
            return

        if len(message.content) < 1:
            return

        self.db.addMessageToDb(message)
Ejemplo n.º 9
0
class DBSaver:
    def __init__(self,):
        self.connection= MongoDB('mongodb://localhost:27017/', "mydatabase")

    def SaveBeaconData(self, beaconDataDict):
        #Save into beacon collection
        for beaconData in beaconDataDict:
            self.connection.SaveNewEntry(str(beaconData['UID']), beaconData)
        #Save into master
        self.connection.SaveNewEnteries("allbeacondata", beaconDataDict)
def all_grids():
    mg = MongoDB()
    mg.connect()
    griddb = GridDB()
    print('querying grid volumes...')
    results = mg.group_by([{'$match': {'created_at': {'$gt': datetime.strptime('2012-10-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ'),
                                                      '$lt': datetime.strptime('2012-11-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')}}}])   # print(results)
    griddb.add(results)

    ret = Grid.get_raw_pandas_ts(results, 'D')

    STL.seasonal_decomposition(ret)
def load_tweets_to_grids():

    # mongodb
    mg = MongoDB()
    mg.connect()

    tweets = mg.find()

    grid_db = GridDB()
    grid_db.add(tweets)

    return grid_db
Ejemplo n.º 12
0
    def __init__(self, update_rank_list):
        self.db = MongoDB(update_rank_list=update_rank_list)
        self.total_rank_list = []
        # self.mid_list = []
        # self.message_room_persentage_dict = {}
        # self.man_chart_dict = {}
        # self.man_status_dict = {}
        # self.radar_dict = {}
        self.huolonglive_tracker = {}
        self.NRS = no_ram_saver()
        'start socket process.. if this works'
        python_ws_client(self.db)

        print('Update once when initialized & take a look at time')
Ejemplo n.º 13
0
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds    = HPFeeds()
        self.MAEC       = MAEC(thug_version)
        self.MongoDB    = MongoDB()
        self.shellcodes = set()
Ejemplo n.º 14
0
class Quick_update(object):
    def __init__(self, update_rank_list):
        self.db = MongoDB(update_rank_list=update_rank_list)
        self.target_dict = {}
        # print('Update once when initialized & take a look at time')
        start_time = time.time()
        self.whole_data_bundle()
        self.period_seconds = int(time.time() - start_time) * 2
        # print("--- %s seconds ---" % (self.period_seconds))

    def begin_update_data_periodically(self):
        timerThread = threading.Thread(target=self.timer_func)
        timerThread.start()

    def timer_func(self):
        next_call = time.time()
        while True:
            # print(f"update data at: {datetime.datetime.now()}")
            start_time = time.time()
            self.whole_data_bundle()
            self.period_seconds = int(time.time() - start_time) * 2
            # print("--- %s seconds ---" % (self.period_seconds))
            next_call = next_call + self.period_seconds
            time.sleep(max(next_call - time.time(), 1))

    def whole_data_bundle(self):
        self.target_dict = self.db.get_updated_server_info()
def single_grid(grid_id):
    mg = MongoDB()
    mg.connect()
    start_time = datetime.strptime('2012-10-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')
    end_time = datetime.strptime('2012-11-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')

    print('querying grid volumes...')
    # results = mg.group_by([{'$match': {'created_at': {'$gt': start_time, '$lt': end_time}, 'grid_id': grid_id}},
    #                       {'$group': {'_id': '$grid_id', 'count': {'$sum': 1}}}])
    results = mg.group_by([{'$match': {'created_at': {'$gt': datetime.strptime('2012-10-15T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'),
                                                       '$lt': datetime.strptime('2012-11-15T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')},
                                       'grid_id': grid_id}}])   # print(results)
    print(results)
    ret = Grid.get_raw_pandas_ts(results, 'H')
    # print('------------')
    # print(ret)
    print(STL.seasonal_decomposition(ret))
Ejemplo n.º 16
0
    def download_predicted_images(dir,
                                  limit: int = None,
                                  batch_size=10,
                                  predicted_class='tpo',
                                  min_accuracy=None,
                                  max_accuracy=None,
                                  source='thumbnail_url'):
        if os.path.isdir(dir) is False:
            os.makedirs(dir)
        collection = MongoDB.db()['images']
        query = {'class': predicted_class, 'accuracy': {}}
        if min_accuracy is not None:
            query['accuracy']['$gte'] = min_accuracy
        if max_accuracy is not None:
            query['accuracy']['$lte'] = max_accuracy
        # if query['accuracy']:
        #     del query['accuracy']

        # dir_files = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
        counter = 0
        stop = False
        last_id = None
        # if len(dir_files) > 0:
        #     last_id = sorted(dir_files)[-1]
        # else:
        #     last_id = None
        total_count = collection.find(query).count()
        if limit is not None:
            total_count = limit
        while (limit is None or counter < limit
               ) and collection.find(query).sort([('uid', 1)]).count() > 0:
            pool = multiprocessing.pool.ThreadPool()
            results = []
            if last_id is not None:
                query['uid'] = {'$gt': last_id}
            for image in collection.find(query).sort([('uid', 1)
                                                      ]).limit(batch_size):
                if limit is not None and counter >= limit:
                    break
                elif limit is None:
                    counter += 1
                if os.path.exists(os.path.join(
                        dir, image['uid'] + '.jpg')) is False:
                    results.append(
                        pool.apply_async(
                            DataLoader.download_to_file,
                            (image[source],
                             os.path.join(dir, image['uid'] + '.jpg'))))
                    if limit is not None:
                        counter += 1
                last_id = image['uid']
            for res in results:
                data = res.get()
            pool.close()
            pool.join()
            print('Downloaded %d of %d images' % (counter, total_count))
Ejemplo n.º 17
0
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds        = HPFeeds()
        self.MAEC           = MAEC(thug_version)
        self.MongoDB        = MongoDB()
        self.JSONLog        = JSONLog(thug_version)
        self.baseDir        = None
        self.shellcodes     = set()
        self.shellcode_urls = set()
Ejemplo n.º 18
0
 def load_places_for_city(self,
                          city_id: int = None,
                          limit: int = None,
                          use_last_cursor: bool = False):
     collection = MongoDB.db()['cities']
     placesCollection = MongoDB.db()['places']
     query = {}
     if city_id is not None:
         query['city_id'] = city_id
     for city in collection.find(query):
         cursor = None
         if use_last_cursor and 'last_cursor' in city:
             if city['last_cursor'] is False:
                 continue
             cursor = city['last_cursor']
         print('Start searching places for %s' % city['name'])
         counter = 0
         while limit is None or counter < limit:
             places, cursor = self.search_places_for_location(
                 city['uid'], 10, cursor)
             for place_id in places:
                 if not placesCollection.find_one({'uid': place_id}):
                     place = places[place_id]
                     place['city_id'] = city['_id']
                     place['location'] = {
                         'type': 'Point',
                         'coordinates':
                         [place['longitude'], place['latitude']]
                     }
                     placesCollection.insert_one(place)
                     print("Place `%s` was added to `%s`" %
                           (place['name'], city['city_local_name']))
                     counter += 1
             MongoDB.db()['cities'].update_one(
                 {'_id': city['_id']}, {'$set': {
                     'last_cursor': cursor
                 }})
             print('Found %d places of %d' %
                   (counter, limit if limit is not None else -1))
             if cursor is False:
                 break
Ejemplo n.º 19
0
class ThugLogging(BaseLogging):
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds    = HPFeeds()
        self.MAEC       = MAEC(thug_version)
        self.MongoDB    = MongoDB()
        self.shellcodes = set()

    def set_url(self, url):
        self.MAEC.set_url(url)
        self.MongoDB.set_url(url)

    def add_behavior_warn(self, description = None, cve = None, method = "Dynamic Analysis"):
        self.MAEC.add_behavior_warn(description, cve, method)

    def add_code_snippet(self, snippet, language, relationship, method = "Dynamic Analysis"):
        self.MAEC.add_code_snippet(snippet, language, relationship, method)

    def log_file(self, data, url):
        sample = self.build_sample(data, url)
        if sample is None:
            return
        
        self.HPFeeds.log_file(sample)
        self.MAEC.log_file(sample)
        self.MongoDB.log_file(sample)

    def log_event(self):
        log.warning("Saving log analysis at %s" % (log.baseDir, ))

        with open(os.path.join(log.baseDir, 'analysis.xml'), 'a+r') as fd:
            self.MAEC.export(outfile = fd)
            fd.seek(0)
            data = fd.read()
            self.HPFeeds.log_event(data)
            self.MongoDB.log_event(data)

    def log_redirect(self, response):
        if not response:
            return

        redirects = list()
        r         = response

        while r.previous:
            redirects.append(r.previous)
            r = r.previous

        while len(redirects):
            p = redirects.pop()
            self.add_behavior_warn("[HTTP Redirection (Status: %s)] Content-Location: %s --> Location: %s" % (p['status'], 
                                                                                                            p['content-location'], 
                                                                                                            p['location'], ))

    def log_href_redirect(self, referer, url):
        self.add_behavior_warn("[HREF Redirection (document.location)] Content-Location: %s --> Location: %s" % (referer, url, ))    
Ejemplo n.º 20
0
    def __init__(self):
        self.command = Command()
        print(type(self.command))

        self.db = MongoDB()
        self.SPEAKER = 'SPEAKER'
        self.music_cmd = [
            "Playing... - func => playingmusic", "명령 : 1. 그만 | 2. 다음",
            "명령을 확인중...", "다시 말해 주세요.", "재생을 정지합니다."
        ]

        self.filename = [
            'music/Do It.mp3', 'music/noma - Color.mp3', 'music/Sakura.mp3',
            'music/Dawn.mp3', 'music/Tomorrow.mp3'
        ]

        self.music_num = random.randrange(0, 5)

        self.initMixer()
        self.r = sr.Recognizer()
        self.count = 0
        self.count = self.music_num
        print(self.count)
        self.playmusic(self.filename[self.music_num])
def get_posts():
    db = MongoDB("facebook")
    search_term = request.args.get('q')
    data = None
    with db.open("posts") as dbpost:
        find_ = re.compile(search_term, re.IGNORECASE)
        data = list(
            dbpost.find({
                "name": "tatnh",
                "content": {
                    "$regex": find_
                }
            }))
    if not data:
        return "NULL"
    list_content = []
    for ct in data:
        if "content" in ct:
            content = ct["content"]
            if search_term.lower() in content.lower():
                content = content
                list_content.append(
                    {"content": content.encode("utf-8").decode("utf-8")})
    return jsonify({"data": list_content})
def query():
    """
    given {start_time, end_time, aggregation}
    return -> [{time, freq}, ...]
    """
    start_time = datetime.strptime(request.query.get("start_time"), '%Y-%m-%dT%H:%M:%SZ')
    end_time = datetime.strptime(request.query.get("end_time"), '%Y-%m-%dT%H:%M:%SZ')
    aggregation = request.query.get("aggregation")

    mg = MongoDB()
    mg.connect()

    print("querying time series...")
    results = mg.find( {'created_at': {'$gt': start_time, '$lt': end_time}} )

    results = Grid.get_ts(results, aggregation)

    ret = []
    for result in results:
        ret.append({'start_time': result[0].to_datetime().strftime('%Y-%m-%dT%H:%M:%SZ'),
                    'frequency': result[1].item()})

    response.content_type = 'application/json'
    return json_dumps(ret, indent=2)
Ejemplo n.º 23
0
def getDailyPrice(params):
    Data = MongoDB.get(**params)
    if isinstance(Data, list):
        for data in Data:
            del data['_id']
        endDate = max([temp.get('Date') for temp in Data])
        startDate = min([temp.get('Date') for temp in Data])
        return jsonify({
            'ReplyMsg':
            f'取得資料成功 {params["Ticker"]} from {startDate} to {endDate}',
            'GridData': Data
        })
    elif isinstance(Data, str):
        return jsonify({
            'ReplyMsg':
            f'取得資料失敗 {params["Ticker"]} from {params["StartDate"]} to {params["EndDate"]}',
            'GridData': Data
        })
Ejemplo n.º 24
0
class DBRetriever:
    def __init__(self):
        self.connection = MongoDB('mongodb://localhost:27017/', "mydatabase")
        self.Logger = FileLogger()

    def QueryDatabase(self, collectionName, filter):
        self.Logger.Info('CollectionName: ' + collectionName + ' Query: ' +
                         str(filter))
        cursor = self.connection.QueryCollection(collectionName, filter)
        #cursor = self.connection.LoadAllEntries(collectionName)
        return self.ParseCursor(cursor)

    def ParseCursor(self, cursor):
        cursorList = list(cursor)
        parsedData = []
        for beaconCursor in cursorList:
            beaconJSON = dumps(beaconCursor)
            beaconObject = json.loads(beaconJSON)
            parsedData.append(beaconObject)

        self.Logger.Info('ReturnedData: ' + str(parsedData))
        return parsedData
def write_tweets_to_mongo():

    """ load tweets, decide the grid of tweets and insert the tweets into mongo
        this function serves as the initial step of the pipeline. 
        all following db-specific operations are based on the dataset inserted here. 
        This function is only called once for the entire pipeline.
    """

    data_file = "data/sandy_all.txt"
    kml_file = ["data/nj_ct.kml", "data/nyc_ct_sea.kml"]
    # kml_file = ['data/nyc_cb_sea.kml']

    tweets = load_tweets(data_file)
    #     trajectories = tweets_to_trajectories(tweets)

    grid_db = GridDB()
    grid_db.load_grid_from_file(kml_file[0])
    grid_db.load_grid_from_file(kml_file[1])
    #     grid_db.write_grids_to_json('shapefile.json')

    grid_db.check_and_add(tweets)

    #####################################################
    ############## index Tweets into MongoDB ############
    #####################################################

    # mongodb
    mg = MongoDB()
    mg.connect()
    print("connected...")
    mg.drop()

    # tweets to dicts;
    rst = []
    for t in grid_db.get_tweets():
        rst.append(t.to_dict())

    print("inserting...")
    mg.insert_tweets(rst)
Ejemplo n.º 26
0
class ThugLogging(BaseLogging):
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds = HPFeeds()
        self.MAEC    = MAEC(thug_version)
        self.MongoDB = MongoDB()

    def set_url(self, url):
        self.MAEC.set_url(url)
        self.MongoDB.set_url(url)

    def add_behavior_warn(self, description = None, cve = None, method = "Dynamic Analysis"):
        self.MAEC.add_behavior_warn(description, cve, method)

    def add_code_snippet(self, snippet, language, relationship):
        self.MAEC.add_code_snippet(snippet, language, relationship)

    def log_file(self, data, url):
        sample = self.build_sample(data, url)
        if sample is None:
            return
        
        self.HPFeeds.log_file(sample)
        self.MAEC.log_file(sample)
        self.MongoDB.log_file(sample)

    def log_event(self):
        log.info("Saving log analysis at %s" % (log.baseDir, ))

        with open(os.path.join(log.baseDir, 'analysis.xml'), 'a+r') as fd:
            self.MAEC.export(outfile = fd)
            fd.seek(0)
            data = fd.read()
            self.HPFeeds.log_event(data)
            self.MongoDB.log_event(data)
Ejemplo n.º 27
0
#!/bin/python

from DataProcessor import DataProcessor
from Utils import Utils
from Logger import Logger
from MongoDB import MongoDB

Utils.createFolderIfNotExists(DataProcessor.TMP_FOLDER)
LOGGER=Logger(DataProcessor.TMP_FOLDER,verbose=False,name='processor')
Utils(DataProcessor.TMP_FOLDER,LOGGER)

LOGGER.info('Starting Data Processor...')
if Utils.runningOnDockerContainer(){
    mongo_addr='mongo'
}else{
    mongo_addr='127.0.0.1'
}
mongo=MongoDB(mongo_addr,27017,LOGGER,user='******',password='******')
processor=DataProcessor(mongo,LOGGER)
mongo.startQueue(id=0)
LOGGER.info('Started Data Processor...OK')
LOGGER.info('Listening on queue as {}'.format(mongo.getQueueConsumerId()))
processor.loopOnQueue()
 def __get_collection__(self):
     return MongoDB.db()[self.__collection__]
    grid_db.add(tweets)

    return grid_db


if __name__ == "__main__":

    #     write_tweets_to_mongo()
    grid_db = load_tweets_to_grids()
    print(grid_db.get_outlier_grid_ids("H"))
    print("tweets loaded")

    # for grid_id in grid_db.grid_cache:
    #     print(grid_id, grid_db.grid_cache[grid_id].get_ts(grid_db.get_tweets(), 'H'))
    exit()
    mg = MongoDB()
    mg.connect()

    print("querying grid volumes...")

    results = mg.group_by(
        [
            {
                "$match": {
                    "created_at": {
                        "$gt": datetime.strptime("2012-10-15T20:00:02Z", "%Y-%m-%dT%H:%M:%SZ"),
                        "$lt": datetime.strptime("2012-10-22T20:00:02Z", "%Y-%m-%dT%H:%M:%SZ"),
                    }
                }
            }
        ]
Ejemplo n.º 30
0
class update_data(object):
    def __init__(self, update_rank_list):
        self.db = MongoDB(update_rank_list=update_rank_list)
        self.total_rank_list = []
        # self.mid_list = []
        # self.message_room_persentage_dict = {}
        # self.man_chart_dict = {}
        # self.man_status_dict = {}
        # self.radar_dict = {}
        self.huolonglive_tracker = {}
        self.NRS = no_ram_saver()
        'start socket process.. if this works'
        python_ws_client(self.db)

        print('Update once when initialized & take a look at time')
        # start_time = time.time()
        # self.whole_data_bundle()
        # self.period_seconds = 500
        # print("--- %s seconds ---" % (self.period_seconds))

    def begin_update_data_periodically(self):
        timerThread_0 = threading.Thread(target=self.timer_func_dual)
        timerThread_0.daemon = True
        print(timerThread_0.daemon)
        timerThread_0.start()

        timerThread = threading.Thread(target=self.timer_func)
        timerThread.daemon = True
        print(timerThread.daemon)
        timerThread.start()

        timerThread_1 = threading.Thread(target=self.timer_func_tri)
        timerThread_1.daemon = True
        print(timerThread_1.daemon)
        timerThread_1.start()

    def timer_func(self):
        # pass
        # next_call = time.time()
        while True:
            # print(f"update data at: {datetime.datetime.now()}")
            # self.whole_data_bundle()
            try:
                print(f"update data at: {datetime.datetime.now()}")
                self.whole_data_bundle()
                time.sleep(60)
            except Exception as error:
                print("*************************************",
                      file=open("log.txt", "a"))
                print('Caught this error from timer_func: ' + repr(error),
                      file=open("log.txt", "a"))
                traceback_str = ''.join(
                    traceback.format_tb(error.__traceback__))
                print('Caught this traceback from timer_func: ' +
                      traceback_str,
                      file=open("log.txt", "a"))
                print("<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>",
                      file=open("log.txt", "a"))

    def timer_func_dual(self):
        # pass
        next_call = time.time()
        while True:
            try:
                self.update_monitor_quickly()
                self.period_seconds = 60
                # print("--- %s seconds ---" % (self.period_seconds))
                print("Finish updating radar")
                next_call = next_call + self.period_seconds
                if next_call - time.time() > 0:
                    time.sleep(next_call - time.time())
            except Exception as error:
                print("*************************************",
                      file=open("log.txt", "a"))
                print('Caught this error from timer_func_dual: ' + repr(error),
                      file=open("log.txt", "a"))
                traceback_str = ''.join(
                    traceback.format_tb(error.__traceback__))
                print('Caught this traceback from timer_func_dual: ' +
                      traceback_str,
                      file=open("log.txt", "a"))
                print("<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>",
                      file=open("log.txt", "a"))

    def timer_func_tri(self):
        next_call = time.time()
        while True:
            try:
                self.db.update_the_original_rank_list()
                self.period_seconds = 3600
                print("Finish updating radar")
                next_call = next_call + self.period_seconds
                if next_call - time.time() > 0:
                    time.sleep(next_call - time.time())
            except Exception as error:
                print("*************************************",
                      file=open("log.txt", "a"))
                print('Caught this error from timer_func_tri: ' + repr(error),
                      file=open("log.txt", "a"))
                traceback_str = ''.join(
                    traceback.format_tb(error.__traceback__))
                print('Caught this traceback from timer_func_tri: ' +
                      traceback_str,
                      file=open("log.txt", "a"))
                print("<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>",
                      file=open("log.txt", "a"))

    def update_monitor_quickly(self):
        'first, calculate rank list'
        self.huolonglive_tracker = self.db.build_huolonglive_tracker()

    def whole_data_bundle(self):
        'Fill in the data bundle'
        # 'Time to update everything~'
        duplicate_update_set = self.db.duplicate_update_set.copy()
        self.db.duplicate_update_set.clear()
        if len(duplicate_update_set) > 0:
            self.db.remove_all_duplicate_elements(duplicate_update_set)

        self.total_rank_list, local_mid_list = self.db.find_total_rank()
        saver_update_set = self.db.saver_update_set.copy()
        # reset the saver_update_list
        self.db.saver_update_set.clear()
        # # 'always make sure there is no duplicate messages'
        # self.db.remove_all_duplicate_elements()
        if len(saver_update_set) > 0:
            for uid in tqdm(saver_update_set):
                if uid in local_mid_list and len(
                        list(self.db.ranking.find({'_id': uid}))) > 0:
                    self.NRS.save_dict(
                        self.db.build_message_room_persentage(uid), uid,
                        "message_room_persentage")
                    self.NRS.save_dict(self.db.build_man_chart(uid), uid,
                                       "man_chart")
                    self.NRS.save_dict(self.db.obtain_man_status(uid), uid,
                                       "man_status")
                    self.NRS.save_dict(self.db.build_radar_chart(uid), uid,
                                       "radar")
                    self.NRS.save_dict(self.db.get_all_danmaku(uid), uid,
                                       "all_danmaku")
                # else:
                #     print("*************************************", file=open("log.txt", "a"))
                #     print('Caught this error from whole data bundle: ' + f"uid:{uid}", file=open("log.txt", "a"))
                #     if_in_local_mid_list = uid in local_mid_list
                #     if_in_duplicate_list = uid in duplicate_update_set
                #     print("if_in_local_mid_list:" + f"{if_in_local_mid_list}" + f"if_in_duplicate_list: {if_in_duplicate_list}", file=open("log.txt", "a"))
                #     print(f"finding rank length: {len(list(self.db.ranking.find({'_id': uid})))}", file=open("log.txt", "a"))
                #     print("<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>", file=open("log.txt", "a"))

        #     self.message_room_persentage_dict[uid] = self.db.build_message_room_persentage(uid)
        #     self.man_chart_dict[uid] = self.db.build_man_chart(uid)
        #     self.man_status_dict[uid] = self.db.obtain_man_status(uid)
        #     self.radar_dict[uid] = self.db.build_radar_chart(uid)
        # self.db.build_basic_message_sets()
        print("everything get updated")

    def get_total_message(self, uid):
        loaded_data = self.NRS.load_dict(uid, "all_danmaku")
        return {'data': loaded_data, 'roomid_list': list(loaded_data.keys())}
        return tweet

def processQuote(tweet, isRT):
    pass

def insertTweet (tweet):
    pass

"""
tweet = getTweet('917946195410128897')
print(sys.getdefaultencoding())
print (tweet.count())
for t in tweet:
    print (t["text"])
"""
db = MongoDB(mongoDB)
RTcount = 0
quoteCount = 0
tweets = db.find(MongoDB.TWEETS_COLLECTION) #getAllTweet()
rts = db.find(MongoDB.TWEETS_COLLECTION, {"retweeted_status": { "$exists" : True}}) #getRTs()
quotes = getQuotes()
originals = getOriginals()
replys = getReplys()

for t in tweets:
    isRT = False
    if CONST_RT in t:
        isRT = True
        RTcount += 1
        processRT(t)
    
Ejemplo n.º 32
0
 def __init__(self):
     # MEMBERS
     # self.motor = MotorControl() #servo motor
     self.db = MongoDB()
     self.lock = threading.Lock()
     self.timedCaptureBool = False
     self.threadInProgress = threading.Event()
     # GUI ELEMENTS
     self.root = Tk()
     self.root.title('ZRecognition')
     self.root.geometry('800x600')
     self.root.resizable(False, False)
     # FRAME DEFINITIONS
     self.imageFrame = Frame(self.root, padx=5, pady=5)
     self.inputFrame = Frame(self.root, padx=5, pady=5)
     self.logFrame = Frame(self.root, padx=5, pady=5)
     # WIDGET DEFINITIONS
     self.canvasColor = Canvas(self.imageFrame)
     self.canvasDigi = Canvas(self.imageFrame)
     self.manualCapture = Button(
         self.inputFrame,
         text='Manual Capture',
         height=1,
         command=lambda: threading.Thread(target=lambda: self.ManualCapture(
         )).start() if not self.threadInProgress.isSet() else None)
     self.timedCapture = Button(
         self.inputFrame,
         text='Timed Capture',
         height=1,
         command=lambda: threading.Thread(target=lambda: self.TimedCapture(
         )).start() if not self.threadInProgress.isSet() else None)
     self.stopCapture = Button(self.inputFrame,
                               text='Stop Timed Capture',
                               height=1,
                               command=lambda: self.StopTimer())
     self.timedCaptureTimer = Text(self.inputFrame, width=10, height=2)
     self.log = sctx.ScrolledText(self.logFrame, height=10, state=DISABLED)
     self.var = IntVar(self.inputFrame)
     self.var.set(3)  # initial value
     self.option = OptionMenu(self.inputFrame, self.var, 3, 4, 5, 6, 7)
     # PLACING ALL ELEMENTS INTO FORM
     # IMAGE FRAME
     self.imageFrame.grid(row=0, column=0, sticky=N + S + E + W)
     self.canvasColor.grid(row=0, column=0, sticky=N + S)
     self.canvasDigi.grid(row=0, column=1, sticky=N + S)
     # INPUT FRAME
     self.inputFrame.grid(row=1, column=0, sticky=N + S + E + W)
     self.manualCapture.grid(row=0, column=0, sticky=N + S + E + W)
     self.timedCapture.grid(row=0, column=1, sticky=N + S + E + W)
     self.stopCapture.grid(row=1, column=0, sticky=N + S + E + W)
     self.option.grid(row=1, column=1)
     # LOG FRAME
     self.logFrame.grid(row=2, column=0, sticky=N + S + E + W)
     self.log.grid(row=0, column=0, sticky=N + S + E + W)
     # GRID WEIGHT FOR EXPANDING
     # MAIN FRAME
     temp = self.root.grid_size()
     for r in range(temp[1]):  # ROWS
         Grid.rowconfigure(self.root, r, weight=1)
     for c in range(temp[0]):
         Grid.columnconfigure(self.root, c, weight=1)
     # CANVAS FRAME
     temp = self.imageFrame.grid_size()
     for r in range(temp[1]):  # ROWS
         Grid.rowconfigure(self.imageFrame, r, weight=1)
     for c in range(temp[0]):
         Grid.columnconfigure(self.imageFrame, c, weight=1)
     # INPUT FRAME
     temp = self.inputFrame.grid_size()
     for r in range(temp[1]):  # ROWS
         Grid.rowconfigure(self.inputFrame, r, weight=1)
     for c in range(temp[0]):
         Grid.columnconfigure(self.inputFrame, c, weight=1)
     # LOG FRAME
     Grid.rowconfigure(self.logFrame, 0, weight=1)
     Grid.columnconfigure(self.logFrame, 0, weight=1)
Ejemplo n.º 33
0
class ThugLogging(BaseLogging):
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds        = HPFeeds()
        self.MAEC           = MAEC(thug_version)
        self.MongoDB        = MongoDB()
        self.JSONLog        = JSONLog(thug_version)
        self.baseDir        = None
        self.shellcodes     = set()
        self.shellcode_urls = set()

    def set_url(self, url):
        self.HPFeeds.set_url(url)
        self.MAEC.set_url(url)
        self.MongoDB.set_url(url)
        self.JSONLog.set_url(url)

    def add_behavior_warn(self, description = None, cve = None, method = "Dynamic Analysis"):
        self.MAEC.add_behavior_warn(description, cve, method)
        self.JSONLog.add_behavior_warn(description, cve, method)

    def add_code_snippet(self, snippet, language, relationship, method = "Dynamic Analysis"):
        self.MAEC.add_code_snippet(snippet, language, relationship, method)
        self.JSONLog.add_code_snippet(snippet, language, relationship, method)

    def log_file(self, data, url):
        sample = self.build_sample(data, url)
        if sample is None:
            return
        
        self.HPFeeds.log_file(sample)
        self.MAEC.log_file(sample)
        self.MongoDB.log_file(copy.deepcopy(sample))
        self.JSONLog.log_file(sample)

    def log_event(self):
        log.warning("Saving log analysis at %s" % (self.baseDir, ))

        with open(os.path.join(self.baseDir, 'analysis.xml'), 'a+r') as fd:
            self.MAEC.export(outfile = fd)
            fd.seek(0)
            data = fd.read()
            self.HPFeeds.log_event(data)
            self.MongoDB.log_event(data)
            self.JSONLog.export(self.baseDir)

    def log_connection(self, source, destination, method, flags = {}):
        """
        Log the connection (redirection, link) between two pages

        @source         The origin page
        @destination    The page the user is made to load next
        @method         Link, iframe, .... that moves the user from source to destination
        @flags          Additional information flags. Existing are: "exploit"
        """

        self.JSONLog.log_connection(source, destination, method, flags)

    def log_location(self, url, ctype, md5, sha256, flags = {}, fsize = 0, mtype = ""):
        """
        Log file information for a given url

        @url            Url we fetched this file from
        @ctype          Content type (whatever the server says it is)
        @md5            MD5 hash
        @sha256         SHA256 hash
        @fsize          File size
        @mtype          Calculated mime type
        """
        self.JSONLog.log_location(url, ctype, md5, sha256, flags = flags, fsize = fsize, mtype = mtype)

    def log_exploit_event(self, url, module, description, cve = None, data = None, forward = True):
        """
        Log file information for a given url

        @url            Url where this exploit occured
        @module         Module/ActiveX Control, ... that gets exploited
        @description    Description of the exploit
        @cve            CVE number (if available)
        @forward        Forward log to add_behavior_warn
        """
        if forward:
            self.add_behavior_warn("[%s] %s" % (module, description, ), cve = cve)

        self.JSONLog.log_exploit_event(url, module, description, cve = cve, data = data)

    def log_warning(self, data):
        log.warning(data)
        self.HPFeeds.log_warning(data)

    def log_redirect(self, response):
        if not response:
            return

        redirects = list()
        r         = response

        last = None
        final = response['content-location']
        while r.previous:
            redirects.append(r.previous)
            r = r.previous

        while len(redirects):
            p = redirects.pop()
            self.add_behavior_warn("[HTTP Redirection (Status: %s)] Content-Location: %s --> Location: %s" % (p['status'], 
                                                                                                            p['content-location'], 
                                                                                                            p['location'], ))
            self.log_connection(p['content-location'], p['location'],"http-redirect")
            last = p['location']
        if last:
            self.log_connection(last, final, "http-redirect")

    def log_href_redirect(self, referer, url):
        self.add_behavior_warn("[HREF Redirection (document.location)] Content-Location: %s --> Location: %s" % (referer, url, ))
        self.log_connection(referer, url, "href")

    def set_basedir(self, url):
        if self.baseDir:
            return

        t = datetime.datetime.now()
        m = hashlib.md5()
        m.update(url)

        base = os.getenv('THUG_LOGBASE', '..')
        self.baseDir = os.path.join(base, 'logs', m.hexdigest(), t.strftime("%Y%m%d%H%M%S"))

        try:
            os.makedirs(self.baseDir)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        with open(os.path.join(base, 'logs', 'thug.csv'), 'a+r') as fd:
            csv_line = '%s,%s\n' % (m.hexdigest(), url, )
            for l in fd.readlines():
                if l == csv_line:
                    return

            fd.write(csv_line)

    def set_absbasedir(self, basedir):
        self.baseDir = basedir

        try:
            os.makedirs(self.baseDir)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise
Ejemplo n.º 34
0
#!/bin/python

from DataProcessor import DataProcessor
from Utils import Utils
from Logger import Logger
from MongoDB import MongoDB

Utils.createFolderIfNotExists(DataProcessor.TMP_FOLDER)
LOGGER = Logger(DataProcessor.TMP_FOLDER, verbose=True, name='processor')
Utils(DataProcessor.TMP_FOLDER, LOGGER)

mongo = MongoDB('127.0.0.1', 27017, LOGGER, user='******', password='******')
mongo.startQueue(id=0)
print(mongo.getQueueConsumerId())

processor = DataProcessor(mongo, LOGGER)

processor.filterAndNormalizeFullDataset()
Ejemplo n.º 35
0
 def __init__(self):
     self.connection = MongoDB('mongodb://localhost:27017/', "mydatabase")
     self.Logger = FileLogger()
Ejemplo n.º 36
0
class music:
    def __init__(self):
        self.command = Command()
        print(type(self.command))

        self.db = MongoDB()
        self.SPEAKER = 'SPEAKER'
        self.music_cmd = [
            "Playing... - func => playingmusic", "명령 : 1. 그만 | 2. 다음",
            "명령을 확인중...", "다시 말해 주세요.", "재생을 정지합니다."
        ]

        self.filename = [
            'music/Do It.mp3', 'music/noma - Color.mp3', 'music/Sakura.mp3',
            'music/Dawn.mp3', 'music/Tomorrow.mp3'
        ]

        self.music_num = random.randrange(0, 5)

        self.initMixer()
        self.r = sr.Recognizer()
        self.count = 0
        self.count = self.music_num
        print(self.count)
        self.playmusic(self.filename[self.music_num])

    def playmusic(self, soundfile):
        pygame.init()
        pygame.mixer.init()
        self.clock = pygame.time.Clock()
        pygame.mixer.music.load(soundfile)
        pygame.mixer.music.play()

        while pygame.mixer.music.get_busy():
            #print("Playing... - func => playingmusic")
            print(self.music_cmd[0])

            self.clock.tick(1000)  # 초당 1000프레임이상이 안되게 제한

            with sr.Microphone() as source:
                self.r.adjust_for_ambient_noise(source)
                print("%s번째 곡 : %s" %
                      ((self.count % 5) + 1, self.filename[self.count % 5]))

                #print("명령 : 1. 그만 | 2. 다음")
                print(self.music_cmd[1])

                self.audio_text = self.r.listen(source)
                try:
                    #print("명령을 확인중...")
                    print(self.music_cmd[2])

                    r2 = self.r.recognize_google(self.audio_text,
                                                 language='ko-KR')
                    print(r2)

                    if self.command.STOP in r2:
                        self.stopmusic()
                        self.db.insert_command_one(self.command.STOP, '',
                                                   self.SPEAKER)

                    elif self.command.NEXT in r2:
                        self.count += 1
                        self.playmusic(self.filename[self.count % 5])
                        self.db.insert_command_one(self.command.NEXT, '',
                                                   self.SPEAKER)

                except KeyboardInterrupt:
                    self.stopmusic()
                    print("\nPlay stopped by user")

                except:
                    #print("다시 말해 주세요.")
                    print(self.music_cmd[3])
                    print(sys.exc_info())

    def stopmusic(self):
        """stop currently playing music"""
        #print("재생을 정지합니다.")
        print(self.music_cmd[4])
        pygame.mixer.music.stop()

    def getmixerargs(self):
        pygame.mixer.init()
        freq, size, chan = pygame.mixer.get_init()
        return freq, size, chan

    def initMixer(self):
        BUFFER = 3072  # audio buffer size, number of samples since pygame 1.8.
        FREQ, SIZE, CHAN = self.getmixerargs()
        pygame.mixer.init(FREQ, SIZE, CHAN, BUFFER)
Ejemplo n.º 37
0
# Conexion for Mongodb Atlas
from MongoDB import MongoDB
connector = MongoDB(
    "mongodb://*****:*****@db-argentum-shard-00-00-8pr3z.gcp.mongodb.net:27017,db-argentum-shard-00-01-8pr3z.gcp.mongodb.net:27017,db-argentum-shard-00-02-8pr3z.gcp.mongodb.net:27017/db-mechanicus?ssl=true&replicaSet=DB-Argentum-shard-0&authSource=admin&retryWrites=true",
    'db-mechanicus')
Ejemplo n.º 38
0
from flask import *
from flask_cors import *
from MongoDB import MongoDB

# runtime configurations
HOST = 'localhost'
PORT = 5000
m = MongoDB(url="localhost:27017", database="college", doc="student")

app = Flask(__name__)


@app.route('/', methods=['GET'])
def get_students():
    print(m.size())
    val = jsonify(m.find_all())
    return val


@app.route('/<id>', methods=['GET'])
def get_student_by_id(id):
    val = m.find_by_id(id)
    return val


@app.route('/', methods=['POST'])
def add_student():
    student = request.get_json()
    m.add(student)
    return student
Ejemplo n.º 39
0
 def search_images_for_place(place_id: int,
                             limit: int = 1000,
                             batch_size: int = 50,
                             common_limit: bool = False,
                             only_new_posts: bool = False):
     images_collection = MongoDB.db()['images']
     place = MongoDB.db()['places'].find_one({'uid': place_id})
     if not place:
         raise NameError("Place not found")
     real_place_id = None
     if place:
         real_place_id = place['_id']
     max_id = None
     min_id = None
     if only_new_posts:
         try:
             if place:
                 max_id = images_collection.find({
                     'place_id': place['_id']
                 }).sort([('uid', -1)]).limit(1).next()
                 max_id = max_id['uid']
         except StopIteration:
             max_id = None
     else:
         try:
             if place:
                 min_id = images_collection.find({
                     'place_id': place['_id']
                 }).sort([('uid', 1)]).limit(1).next()
                 min_id = min_id['uid']
                 print('Place_id=%d continue from %d' %
                       (int(place_id), int(min_id)))
         except StopIteration:
             min_id = None
     if common_limit is True:
         place_images_quantity = 0
         if place:
             place_images_quantity = images_collection.count(
                 {'place_id': place['_id']})
         limit = max(limit - place_images_quantity, 0)
     images, _ = DataLoader.search_images_for_location(place_id,
                                                       limit,
                                                       batch_size,
                                                       cursor=min_id,
                                                       min_id=max_id)
     if images is False:
         return False
     for image in images:
         if images_collection.count(
         ) == 0 or not images_collection.find_one({'uid': image['id']}):
             images_collection.insert_one({
                 'place_id':
                 real_place_id,
                 'uid':
                 image['id'],
                 'shortcode':
                 image['shortcode' if 'shortcode' in image else 'code'],
                 'image_url':
                 image['display_url' if 'display_url' in
                       image else 'display_src'],
                 'thumbnail_url':
                 image['thumbnail_src'],
                 'class':
                 None,
                 'accuracy':
                 None
             })
Ejemplo n.º 40
0
class ThugLogging(BaseLogging):
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds = HPFeeds()
        self.MAEC = MAEC(thug_version)
        self.MongoDB = MongoDB()
        self.shellcodes = set()

    def set_url(self, url):
        self.MAEC.set_url(url)
        self.MongoDB.set_url(url)

    def add_behavior_warn(self,
                          description=None,
                          cve=None,
                          method="Dynamic Analysis"):
        self.MAEC.add_behavior_warn(description, cve, method)

    def add_code_snippet(self,
                         snippet,
                         language,
                         relationship,
                         method="Dynamic Analysis"):
        self.MAEC.add_code_snippet(snippet, language, relationship, method)

    def log_file(self, data, url):
        sample = self.build_sample(data, url)
        if sample is None:
            return

        self.HPFeeds.log_file(sample)
        self.MAEC.log_file(sample)
        self.MongoDB.log_file(sample)

    def log_event(self):
        log.warning("Saving log analysis at %s" % (log.baseDir, ))

        with open(os.path.join(log.baseDir, 'analysis.xml'), 'a+r') as fd:
            self.MAEC.export(outfile=fd)
            fd.seek(0)
            data = fd.read()
            self.HPFeeds.log_event(data)
            self.MongoDB.log_event(data)

    def log_redirect(self, response):
        if not response:
            return

        redirects = list()
        r = response

        while r.previous:
            redirects.append(r.previous)
            r = r.previous

        while len(redirects):
            p = redirects.pop()
            self.add_behavior_warn(
                "[HTTP Redirection (Status: %s)] Content-Location: %s --> Location: %s"
                % (
                    p['status'],
                    p['content-location'],
                    p['location'],
                ))

    def log_href_redirect(self, referer, url):
        self.add_behavior_warn(
            "[HREF Redirection (document.location)] Content-Location: %s --> Location: %s"
            % (
                referer,
                url,
            ))
Ejemplo n.º 41
0
 def __init__(self,):
     self.connection= MongoDB('mongodb://localhost:27017/', "mydatabase")
Ejemplo n.º 42
0
class ThugLogging(BaseLogging):
    def __init__(self, thug_version):
        BaseLogging.__init__(self)

        self.HPFeeds        = HPFeeds()
        self.MAEC           = MAEC(thug_version)
        self.MongoDB        = MongoDB()
        self.baseDir        = None
        self.shellcodes     = set()
        self.shellcode_urls = set()

    def set_url(self, url):
        self.MAEC.set_url(url)
        self.MongoDB.set_url(url)

    def add_behavior_warn(self, description = None, cve = None, method = "Dynamic Analysis"):
        self.MAEC.add_behavior_warn(description, cve, method)

    def add_code_snippet(self, snippet, language, relationship, method = "Dynamic Analysis"):
        self.MAEC.add_code_snippet(snippet, language, relationship, method)

    def log_file(self, data, url):
        sample = self.build_sample(data, url)
        if sample is None:
            return
        
        self.HPFeeds.log_file(sample)
        self.MAEC.log_file(sample)
        self.MongoDB.log_file(sample)

    def log_event(self):
        log.warning("Saving log analysis at %s" % (self.baseDir, ))

        with open(os.path.join(self.baseDir, 'analysis.xml'), 'a+r') as fd:
            self.MAEC.export(outfile = fd)
            fd.seek(0)
            data = fd.read()
            self.HPFeeds.log_event(data)
            self.MongoDB.log_event(data)

    def log_redirect(self, response):
        if not response:
            return

        redirects = list()
        r         = response

        while r.previous:
            redirects.append(r.previous)
            r = r.previous

        while len(redirects):
            p = redirects.pop()
            self.add_behavior_warn("[HTTP Redirection (Status: %s)] Content-Location: %s --> Location: %s" % (p['status'], 
                                                                                                            p['content-location'], 
                                                                                                            p['location'], ))

    def log_href_redirect(self, referer, url):
        self.add_behavior_warn("[HREF Redirection (document.location)] Content-Location: %s --> Location: %s" % (referer, url, ))


    def set_basedir(self, url):
        if self.baseDir:
            return

        t = datetime.datetime.now()
        m = hashlib.md5()
        m.update(url)

        base = os.getenv('THUG_LOGBASE', '..')
        self.baseDir = os.path.join(base, 'logs', m.hexdigest(), t.strftime("%Y%m%d%H%M%S"))

        try:
            os.makedirs(self.baseDir)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        with open(os.path.join(base, 'logs', 'thug.csv'), 'a+r') as fd:
            csv_line = '%s,%s\n' % (m.hexdigest(), url, )
            for l in fd.readlines():
                if l == csv_line:
                    return

            fd.write(csv_line)

    def set_absbasedir(self, basedir):
        self.baseDir = basedir

        try:
            os.makedirs(self.baseDir)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise
Ejemplo n.º 43
0
    def search_places_for_city(city_id: str = None,
                               limit: int = None,
                               use_last_cursor: bool = True):
        cursor_property = 'places_search_cursor'
        city_collection = MongoDB.db()['cities']
        places_collection = MongoDB.db()['places']
        query = {}

        if city_id is not None:
            query['_id'] = bson.objectid.ObjectId(city_id)

        for city in city_collection.find(query):
            cursor = None
            if use_last_cursor and cursor_property in city:
                if city[cursor_property] is False:
                    continue
                cursor = city[cursor_property]
            print('Start searching places for %s' % city['name'])
            counter = 0
            searcher = Facebook.PlaceSearcherByCoordinates(
                latitude=city['latitude'],
                longitude=city['longitude'],
                distance=20000,
                fields=['name', 'location', 'category'],
                next_cursor=cursor,
                categories=[
                    Facebook.PlaceSearcherByCoordinates.
                    CATEGORY_ARTS_ENTERTAINMENT,
                    Facebook.PlaceSearcherByCoordinates.CATEGORY_EDUCATION,
                    Facebook.PlaceSearcherByCoordinates.
                    CATEGORY_FITNESS_RECREATION,
                    Facebook.PlaceSearcherByCoordinates.CATEGORY_HOTEL_LODGING,
                    Facebook.PlaceSearcherByCoordinates.
                    CATEGORY_MEDICAL_HEALTH, Facebook.
                    PlaceSearcherByCoordinates.CATEGORY_SHOPPING_RETAIL,
                    Facebook.PlaceSearcherByCoordinates.
                    CATEGORY_TRAVEL_TRANSPORTATION,
                    Facebook.PlaceSearcherByCoordinates.CATEGORY_FOOD_BEVERAGE
                ])
            while limit is None or counter < limit:
                places, cursor = searcher.next()
                for place in places:
                    if not places_collection.find_one({'uid': place.uid}):
                        place_city_id = city['_id']
                        if place.is_city():
                            place_city = city_collection.find_one(
                                {'uid': place.uid})
                            if not place_city:
                                place_city_id = city_collection.insert_one({
                                    'city_id':
                                    place.uid,
                                    'uid':
                                    place.uid,
                                    'name':
                                    place.name,
                                    'city_local_name':
                                    place.name,
                                    'latitude':
                                    place.latitude,
                                    'longitude':
                                    place.longitude
                                }).inserted_id
                            else:
                                place_city_id = place_city['_id']
                        place = place.__dict__
                        place['city_id'] = place_city_id
                        place['location'] = {
                            'type': 'Point',
                            'coordinates':
                            [place['longitude'], place['latitude']]
                        }
                        places_collection.insert_one(place)
                        print("Place `%s` was added to `%s`" %
                              (place['name'], city['name']))
                        counter += 1
                city_collection.update_one({'_id': city['_id']},
                                           {'$set': {
                                               cursor_property: cursor
                                           }})
                print('Found %d places of %d' %
                      (counter, limit if limit is not None else -1))
                if cursor is False:
                    break
 def wrapper(func, *args, **kwargs):
     mongo = MongoDB()
     mongo.connect(app.config['app.mongohost'])