Ejemplo n.º 1
0
    def api_economy(self):
        print('Executing api_economy')
        gdp_india = {}
        for record in self.data['records']:
            gdp = dict()

            # taking out yearly GDP value from records
            gdp['GDP_in_rs_cr'] = int(
                record['gross_domestic_product_in_rs_cr_at_2004_05_prices'])
            gdp_india[record['financial_year']] = gdp
            gdp_india_yrs = list(gdp_india)

        for i in range(len(gdp_india_yrs)):
            if i == 0:
                pass
            else:
                key = 'GDP_Growth_' + gdp_india_yrs[i]
                # calculating GDP growth on yearly basis
                gdp_india[gdp_india_yrs[i]][key] = round(
                    ((gdp_india[gdp_india_yrs[i]]['GDP_in_rs_cr'] -
                      gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) /
                     gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) * 100, 2)

        # convert to pandas dataframe
        gdp_india = pd.DataFrame(list(gdp_india.items()),
                                 columns=['financial_year', 'gdp_growth'])

        # connect to mongodb
        mongodb_obj = MongoDB('etluser', 'etluser', 'localhost', 'GDP')
        mongodb_obj.insert_into_db(gdp_india, 'India_GDP')
    def __init__(self, tab_images):
        super(Collector, self).__init__()
        self._lock = threading.RLock()

        self._db = MongoDB()
        self._thread_stop = False
        self._images = []
        self._null_times = 0
        self._read_pos = -1
        self._write_pos = -1
        self._tab_images = tab_images
        self._max_size = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 "max_size"))
        self._interval = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 "sleep_time"))
        self._allowed_null_times = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 'allowed_null_times'))
        self._image_count = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 "images_count"))

        #初始时将正在做的任务至为未做
        self._db.update(self._tab_images,
                        {'image_pron_status': Constance.DOING},
                        {'image_pron_status': Constance.TODO})
        self._db.set_ensure_index(self._tab_images, 'image_pron_status')

        self._finished_callback = None
class ImagePornControl(threading.Thread):
    def __init__(self, collector, tab_images):
        super(ImagePornControl, self).__init__()

        self._collector = collector
        self._tab_images = tab_images

        self._deal_image_count = int(
            tools.get_conf_value('config.conf', "image_porn",
                                 "deal_image_count"))
        self._interval = int(
            tools.get_conf_value('config.conf', "image_porn", "sleep_time"))

        self._db = MongoDB()
        self._image_porn_recg = ImagePornRecg()

    def run(self):
        while True:
            # 判断是否结束
            if self._collector.is_finished():
                break

            images = self._collector.get_images(self._deal_image_count)

            for image in images:
                try:
                    image_url = image['image_url']

                    sexy_image_status = []  # 检测结果
                    sexy_image_url = []  # 对应的照片url

                    try:
                        result = self._image_porn_recg.image_predict(image_url)
                    except Exception as e:
                        log.debug(e)
                    else:
                        log.debug('''
                            image_url :%s
                            result    :%d
                            ''' % (image_url, result))

                    sexy_image_status = 1 if result == 6 else 5
                    sexy_image_url = image_url

                    self._db.update(self._tab_images, {'_id': image['_id']}, {
                        'image_pron_status': Constance.DONE,
                        'sexy_image_status': sexy_image_status,
                        'sexy_image_url': sexy_image_url
                    })

                except Exception as e:
                    raise
                    # log.error('%s 表中无 image_url 字段'%self._tab_images)

            time.sleep(self._interval)

        self.finished()

    def finished(self):
        self._image_porn_recg.close()
Ejemplo n.º 4
0
def test_insertar_elemento_incorrecto():
    """Test 6: intento fallido de insertar una nueva mascota puesto que alguno
        de los parámetros para establecer la conexión no son correctos."""
    nueva_mascota = {
        'id': '1',
        'nombre': 'Simba',
        'tipo_animal': 'cat',
        'raza': 'angora',
        'tamanio': 'small',
        'genero': 'male',
        'edad': 'young',
        'tipo_pelaje': 'short',
        'estado': 'adoptable',
        'ninios': 'no',
        'gatos': 'yes',
        'perros': 'no',
        'ciudad': 'Granada',
        'pais': 'España'
    }
    """Para ello modificamos la conexión a la base de datos para que sea incorrecta."""
    conexion_incorrecta = MongoDB(os.environ.get("MONGODB_URI"), 'PetfinderBD',
                                  'mascotas')
    conexion_incorrecta.coleccion = None
    with pytest.raises(CollectionNotFound):
        assert conexion_incorrecta.insertar_elemento(nueva_mascota)
Ejemplo n.º 5
0
def extract_and_upload_text_from_images(bucket_name, tiff_documents_list, filing_type):

    print("Length of mini_tiff_documents_list:", len(tiff_documents_list))

    aws_s3_sdk_controller = AwsS3SdkController()
    mini_thread_postgresql_client = PostgreSQLClient()
    mongodb_client = MongoDB()

    for json_object in tiff_documents_list:
        try:
            document_id = json_object.get("document_id")

            # print(json_object.get("tiff_document_name"))
            # print("Document ID:", document_id)

            tiff_document = aws_s3_sdk_controller.download_specific_s3_file(bucket_name,
                                                                            json_object.get("tiff_document_name"))
            extracted_string = string_extracton_v3.run_string_extraction(tiff_document, filing_type)

            mongodb_client.insert_document_into_database(document_id, extracted_string)

            mini_thread_postgresql_client.update_mysql_document_tracking(document_id)

        except Exception as error:
            print("[ERROR]  Tiff File Name:", json_object.get("tiff_document_name"))
            print("[ERROR]  Document ID:", document_id)
            print("[ERROR]  extract_and_upload_text_from_images", error)
Ejemplo n.º 6
0
    def time(self, update, context):
        # Handles convo cancellation
        if update.message.text == "/cancel":
            context.bot.send_message(
                update.effective_chat.id,
                "You have stopped scheduling for a reminder.")
            return ConversationHandler.END

        # check if there are 8 numbers in the str
        message_time = update.message.text

        if len(message_time) != 4 or message_time.isdigit() == False:
            context.bot.send_message(
                update.effective_chat.id,
                "Please check that you have entered 4 numbers.")

            return self.TIME

        hour = message_time[0:2]
        minute = message_time[2:4]

        # Check if date is current date or later including the time
        current_date = datetime.now()
        input_date = datetime(int(self.year_val), int(self.month_val),
                              int(self.day_val), int(hour), int(minute))

        if current_date > input_date:
            context.bot.send_message(
                update.effective_chat.id,
                "You have entered a time in the past. Please re-enter the time(24 hours)."
            )

            return self.TIME

        # store hour and minute for display
        self.hour_val = hour
        self.minute_val = minute

        # store the time in memory before writing into the database
        self.time_val = message_time

        reply_message = "Description: {0}\nDate(Day/Month/Year): {1}/{2}/{3}\nTime(hh:mm): {4}:{5}".format(
            self.description_val, self.day_val, self.month_val, self.year_val,
            self.hour_val, self.minute_val)
        context.bot.send_message(update.effective_chat.id, reply_message)

        # Init mongodb connection
        db = MongoDB('heroku_mqncqpgt', 'reminders')
        db.insertonedb({
            "chatid": update.message.chat.id,
            "description": self.description_val,
            "date": self.date_val,
            "time": self.time_val
        })

        return ConversationHandler.END
Ejemplo n.º 7
0
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db, self.ds.collection)
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
Ejemplo n.º 8
0
    def deal_item(self, data):
        rumors = data["results"]
        mongo = MongoDB(MONGODB_URI, "rumors")

        for rumor in rumors:
            rumor_id = generate_hash("{}{}".format(rumor["title"],
                                                   rumor["rumorType"]))
            rumor.update({"_id": rumor_id, "source": "丁香园", "agency": "丁香园"})
            if self.url_repeat(rumor_id) is False and mongo.insert(rumor):
                self.update_filter_queue(rumor_id)
Ejemplo n.º 9
0
def fetch(page):
    db = MongoDB()

    uuid = get_uuid()
    token = CreatToken(page).get_token()
    params = {
        'cityName': cityName,
        'cateId': type_,
        'areaId': '0',
        'sort': '',
        'dinnerCountAttrId': '',
        'page': page,
        'userId': '',
        'uuid': uuid,
        'platform': '1',
        'partner': '126',
        'originUrl': originUrl + 'pn{}/'.format(page),
        'riskLevel': '1',
        'optimusCode': '1',
        '_token': token
    }

    res = requests.get(base_url, params=params, headers=HEADERS)
    result = json.loads(res.text)
    items = result['data']['poiInfos']
    for item in items:
        # print(store)
        store = parse_store(item)
        # db.save(store)

        poiId = store['poiId']
        commentCount = store['allCommentNum']
        max_page = math.ceil(int(commentCount) / 10)
        comment_list = []
        for offset in range(max_page):
            params = {
                'uuid': get_uuid(),
                'id': poiId,
                'userId': '2490983615',
                'offset': offset * 10,
                'pageSize': '10',
            }

            resp = requests.get(comment_url, params=params, headers=HEADERS)
            # print(resp.text)
            result = json.loads(resp.text)
            items = result['data']['comments']
            for item in items:
                comment = parse_comment(item)
                print(comment)
                comment_list.append(comment)
        store['comment'] = comment_list
        print(store)
        db.save(store)
Ejemplo n.º 10
0
 def __init__(self, *args, **kwargs):
     self.location_name = location_name = kwargs.get("location_name")
     self.name = location_name + "_detailed"
     self.location = location = LocationManager().get_location(location_name)
     self.document_ids_ready_for_processing = []
     self.detailed_collection = MongoDB(location.detailedCollectionName)
     self.recent_collection = MongoDB(location.recentCollectionName)
     self.start_urls = [self.next_url()]
     self.proxy = ProxyProvider.provide()
     super().__init__(name=self.name)
     self.logger.info(f"DetailedItemsSpider initialized")
Ejemplo n.º 11
0
    def __init__(self):
        self.PORT = 9999
        self.BUFSIZE = 256
        self.server = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.server.bind(('', self.PORT))
        print('[Server] Pengsoo Server Ready!')

        self.device = Device()
        self.db = MongoDB()
        self.mpu = mpu6050()

        self.targetAddr = ''
Ejemplo n.º 12
0
def index():
    message = None
    db = MongoDB(app)
    if request.method == 'POST':
        sake_data = check_request_set(request)
        if not sake_data:
            message = 'Error'
        else:
            db.set_sake(sake_data)
            message = 'Success!'

    return render_template('index.html', message=message)
Ejemplo n.º 13
0
def regist():
    message = None
    db = MongoDB(app)
    sake_list = []
    if request.method == 'POST':
        sake_name = check_request_get(request)
        if not sake_name:
            message = 'Error'
        else:
            sake_list = db.get_sake(sake_name)
            message = '%s people found' % len(sake_list)

    return render_template('search.html', message=message, sake_list=sake_list)
    def __init__(self, collector, tab_images):
        super(ImagePornControl, self).__init__()

        self._collector = collector
        self._tab_images = tab_images

        self._deal_image_count = int(
            tools.get_conf_value('config.conf', "image_porn",
                                 "deal_image_count"))
        self._interval = int(
            tools.get_conf_value('config.conf', "image_porn", "sleep_time"))

        self._db = MongoDB()
        self._image_porn_recg = ImagePornRecg()
Ejemplo n.º 15
0
def action():

    is_empty, logs = mysql_operator.get_last_quotas()

    if is_empty:
        write_log("Log is Empty.")
        return

    mongo_operator = MongoDB(user, password, host, port, database)
    for row in logs:
        mongo_operator.save(row['resource'], row['in_use'], row['created'],
                            row['project_id'])

    for row in logs:
        is_saved = mongo_operator.check(row['resource'], row['project_id'],
                                        row['created'], row['in_use'])

        if is_saved == False:
            mongo_operator.save(row['resource'], row['in_use'], row['created'],
                                row['project_id'])

        is_saved = mongo_operator.check(row['resource'], row['project_id'],
                                        row['created'], row['in_use'])
        if is_saved == False:
            write_log("resource:" + str(row['resource']) + " project_id:" +
                      str(row['project_id']) + " created:" +
                      str(row['created']) + " in_use:" + str(row['in_use']) +
                      " write failed.")

    mysql_operator.clear_old_quotas()
Ejemplo n.º 16
0
    def __init__(self, category):
        """
        :param category: The category you are searching for, e.g. flats /heels
        :return: None
        Define the category, base url and a variable to store the
        links to all pages.
        """
        self.category = category.lower()
        self.company = 'barneys'
        self.base_url = 'http://www.barneys.com/barneys-new-york/women/shoes/'

        self.all_links = []

        self.params = {'start': 0}
        self.mongo = MongoDB(db_name='shoes', table_name=self.category)
Ejemplo n.º 17
0
    def __init__(self, category):
        """
        :param category: The category you are searching for, e.g. flats /heels
        :return: None
        Define the category, base url and a variable tostore
        the links to all pages.
        """
        self.category = category.lower()
        self.company = 'saks'
        self.base_url = 'http://www.saksfifthavenue.com/Shoes/'

        self.all_links = []

        self.params = {'Nao': 0}
        self.mongo = MongoDB(db_name='shoes', table_name=self.category)
Ejemplo n.º 18
0
    def __init__(self, category):
        """
        :param category: The category you are searching for, e.g. flats /heels
        :return: None
        Define the category, base url and a variable to
        store the links to all pages.
        """
        self.category = category.lower()
        self.company = 'nordstrom'
        self.base_url = 'http://shop.nordstrom.com/c/'

        self.all_links = []

        self.params = {'page': 1}
        self.mongo = MongoDB(db_name='shoes', table_name=self.category)
Ejemplo n.º 19
0
    def _compute_idf(self):
        """
        Compute idf
        :return: 
        """
        temp_dict = {}

        self._mongo_session = MongoDB()
        self._mongo_session.connect(host="localhost",
                                    port=27017,
                                    database="crawler",
                                    collection="tf_dict")
        db_tf_results = self._mongo_session.select({})

        for result in db_tf_results:

            for _file, words_dict in result.items():

                if _file not in temp_dict:
                    temp_dict[_file] = {}

                doc_norm = 0

                for word, tf in words_dict.items():

                    if word in self._reverse_index:
                        idf = math.log(
                            self._number_of_docs +
                            0.1 / float(len(self._reverse_index[word])), 10)
                        idf = float("{0:.6f}".format(idf))

                        doc_norm += math.pow(tf * idf, 2)

                        temp_dict[_file][word] = {
                            "tf": tf,
                            "idf": idf,
                            "doc": float("{0:.6f}".format(tf * idf))
                        }

                temp_dict[_file]['|doc|'] = float("{0:.6f}".format(
                    math.sqrt(doc_norm)))

        self._mongo_session.connect(host="localhost",
                                    port=27017,
                                    database="crawler",
                                    collection="tf_idf_dict")
        self._mongo_session.insert_document(temp_dict, "tf_idf_dict")
        self._mongo_session.disconnect()
Ejemplo n.º 20
0
def deleteJob(job_id):
    mDB = MongoDB()
    Col = mDB.DB["Jobs"]
    Col.delete_one({"_id": ObjectId(job_id)})

    x = Scheduler()
    x.deleteJob(job_id)
Ejemplo n.º 21
0
def create_app():
    app = Flask(__name__)
    db = MongoDB()

    @app.route('/')
    def index():
        items = db.get_items()
        item_view_model = ViewModel(items)
        return render_template('index.html', view_model=item_view_model)

    @app.route('/', methods=['POST'])
    def add_item():
        title = request.form['item_title']
        db.add_item(title)
        return redirect(url_for('index'))

    @app.route('/items/<id>', methods=['POST'])
    def mark_item_as_complete(id):
        db.mark_as_complete(id)
        return redirect(url_for('index'))

    @app.route('/items/delete/<id>', methods=['POST'])
    def delete_item(id):
        db.remove_item(id)
        return redirect(url_for('index'))

    return app
Ejemplo n.º 22
0
def write_db(collection_name, db_name, entity_generator):
    """Used to write the entity name and the frequency of the entities to a MongoDB database"""
    mongodb = MongoDB(db_name=db_name)
    col = mongodb.db.get_collection(collection_name)

    for entity_list in entity_generator:
        entities = Counter(list(entity_list))
        item = list({
            'entity': i[0],
            'f': i[1]
        } for i in entities.most_common())

        for value in item:
            if value['entity']:
                new_entity = search_acronyms(value['entity'])
                new_id = sub(r'[\s-]+', '', str(new_entity).lower())
                # Appending entities to the MongoDB
                result = col.find_one({'_id': new_id})
                print '.',
                if result:
                    col.update_one({'_id': new_id}, {
                        '$set': {
                            'entity': __compare(new_entity, result['entity']),
                            'f': value['f'] + result['f'],
                        }
                    },
                                   upsert=False)
                else:
                    col.insert_one({
                        '_id': new_id,
                        'entity': new_entity,
                        'f': value['f'],
                    })
        print
    print 'Process completed successfully!!!'
Ejemplo n.º 23
0
async def generating(secret: str, code_phrase: str) -> dict:
    """
    Processes request and makes a query
    to the database for a record.

    Example:
    /generate?secret=super_secret_message&code_phrase=code_password

    :param secret: secret message
    :type secret: str
    :param code_phrase: for access control
    :type code_phrase: str
    :return: response with secret_id
    :rtype: dict
    """
    db = MongoDB()
    return {"secret_id": db.create_secret(secret, code_phrase)}
Ejemplo n.º 24
0
async def geting(secret_id: str, code_phrase: str) -> dict:
    """
    Processes the request and makes a query
    to the database for reading.

    Example:
    /secrets/5eb82d06b893f7227b4f73ff?code_phrase=code_password

    :param secret_id:
    :type secret_id: str
    :param code_phrase:
    :type code_phrase: str
    :return: decrypted secret or an error
    :rtype: dict
    """
    db = MongoDB()
    return {"secret": db.get_secret(secret_id, code_phrase)}
Ejemplo n.º 25
0
 def __init__(self, *args, **kwargs):
     self.name = kwargs.get("location_name") + "_recent"
     delta_timestamp = datetime.now() - timedelta(minutes=3)
     self.last_stamp = int(datetime.timestamp(delta_timestamp))
     self.page = 1
     self.location = location = LocationManager().get_location(kwargs.get("location_name"))
     self.recent_collection = MongoDB(location.recentCollectionName)
     self.detailed_collection = MongoDB(location.detailedCollectionName)
     self.url_pattern = 'https://m.avito.ru/api/9/items?key={key}&sort={sort}&locationId={location_id}&page=__page__&lastStamp=__timestamp__&display={display}&limit={limit}'.format(
         key=API_KEY,
         sort='date',
         location_id=location.id,
         display='list',
         limit=99)
     self.start_urls = [self.next_url()]
     self.proxy = ProxyProvider.provide()
     super().__init__(name=self.name)
Ejemplo n.º 26
0
 def setUp(self):
     test_config = TestConfig()
     self.data_path = test_config.get_data_path()
     config = Configuration(os.path.join(self.data_path,'build.ini'))
     db_name = config.get('mongodb1','db_name')
     host = config.get('mongodb1','host')
     config = MongoDBConfig(db_name, host)
     self.db = MongoDB(config)
Ejemplo n.º 27
0
 def __WriteDict(dic, collection, FieldK, FieldV):
     db = MongoDB.getConnection('mining')
     docs = []
     for k,v in dic.iteritems():
         doc = {}
         doc[FieldK] = k
         doc[FieldV] = v
         docs.append(doc)
     db[collection].insert(docs)
Ejemplo n.º 28
0
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db,self.ds.collection)
     self.tweet=""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
Ejemplo n.º 29
0
def listJob():

    mDB = MongoDB()

    Col = mDB.DB["Jobs"]
    jobs = Col.find()

    resp = json.loads(dumps(jobs))

    return resp, 200
def test_app():
    load_dotenv(override=True)

    # Create the new board & update the board id environment variable
    os.environ['COLLECTION_NAME'] = 'test-todos'

    # construct the new application
    application = app.create_app()

    # start the app in its own thread.
    thread = Thread(target=lambda: application.run(use_reloader=False))
    thread.daemon = True
    thread.start()
    yield application

    # Tear Down
    thread.join(1)
    mongodb = MongoDB()
    mongodb.get_collection().drop()
Ejemplo n.º 31
0
 def __ReadDict(dic, collection, FieldK, FieldV):
     db = MongoDB.getConnection('mining')
     for doc in db[collection].find():
         k = doc[FieldK]
         v = doc[FieldV]
         if (FieldK == "term"):
             k = k.encode('utf-8')
         if (FieldV == "term"):
             v = v.encode('utf-8')
         dic[k] = v
Ejemplo n.º 32
0
    def __init__(self, search_term):
        """
        :param search_term: The term you search for, e.g. flats / pumps
        :return: None

        Define the search term, base url and a variable to store the links to
        all the pages related to the serach term
        """
        self.search_term = search_term
        self.company = 'saks'
        self.params = {'SearchString': self.search_term, 'Nao': 0}
        self.base_url = 'http://www.saksfifthavenue.com/search/EndecaSearch.jsp?\
                         bmForm=endeca_search_form_one&bmFormID=kKYnHcK&bmUID=kKYnHcL&bmIsForm=true\
                         &bmPrevTemplate=%2Fmain%2FSectionPage.jsp&bmText=SearchString&submit-search=\
                         &bmSingle=N_Dim&N_Dim=0&bmHidden=Ntk&Ntk=Entire+Site&bmHidden=Ntx\
                         &Ntx=mode%2Bmatchpartialmax&bmHidden=prp8&prp8=t15&bmHidden=prp13&prp13=\
                         &bmHidden=sid&sid=14BBCA598131&bmHidden=FOLDER%3C%3Efolder_id&FOLDER%3C%3Efolder_id='
        self.base_url = self.base_url.replace(' ', '')
        self.all_links = []
        self.mongo = MongoDB(db_name='shoe', table_name=search_term)
Ejemplo n.º 33
0
 def __init__(self, config, nodeName, loadFromDB = False):
     self.node = config.GetChild(nodeName)
     self.trained = loadFromDB
     GlobalInfo.Init(config, "__global__", loadFromDB)
     #get data source
     data_source = self.node.GetChild("data_source")
     dbname = data_source.GetChild('db').GetValue()
     self.collection = data_source.GetChild('collection').GetValue()
     self.field = data_source.GetChild('field').GetValue()
     #self.field = self.field.encode('utf-8')
     self.db = MongoDB.getConnection(dbname)
Ejemplo n.º 34
0
def insertBookIntoDB(bookPath, bookInfo, bookTitle, toc, indexdict, logger):
    
    bookdict = {}

    #item['author'] = item['author'].encode('utf-8', 'strict')

    bookdict['title'] = bookTitle.encode('utf-8', 'ignore')
    bookdict['path'] = bookPath['pdf']
    bookdict['name']= bookPath['name']
    bookdict['toc'] = toc
    bookdict['indexkeywords'] = indexdict
    #bookdict['bookinfo'] = bookInfo

    dbInstance = MongoDB()
    result, id = dbInstance.insertABook(GLOBAL_CONSTANTS.CollectionName, bookdict)
    
    print "Insert book status : ", result, GLOBAL_CONSTANTS.CollectionName

    logger.writeLine("Book inserted into the database : " + str(result) + " " + bookdict['name'] + " " + GLOBAL_CONSTANTS.CollectionName)
    
    return
Ejemplo n.º 35
0
    def check_schedules():
        print (schedulecheck.updater)
        db = MongoDB('heroku_mqncqpgt', 'reminders')

        # Get all reminders today.
        current_date = datetime.now().date().strftime('%d%m%Y')
        query = { "date" : current_date }

        # Get current time
        current_time = datetime.now().strftime("%H%M")

        # Loop through all reminders today to check for the time
        for element in db.finddb(query):

                # separate the date str
                # day = element["date"][0:2]
                # month = element["date"][2:4]
                # year = element["date"][4:8]

                # separate the time str
                hour = int(element["time"][0:2])
                minute = int(element["time"][2:4])

                # Convert hour and minute into datetime
                remindertime = datetime.now().replace(hour=hour, minute=minute).strftime("%H%M")

                # If remindertime is less than current timing, send out timing and delete the reminder from DB
                if remindertime <= current_time:
                    chatid = element["chatid"]

                    messagestr = "Reminder: {0}".format(element["description"])

                    schedulecheck.updater.bot.send_message(chatid, messagestr)

                    # Convert object id str into ObjectID
                    objectid = ObjectId(str(element["_id"]))

                    # Delete from db
                    query = { "_id" : objectid }
                    db.deleteonedb(query)
Ejemplo n.º 36
0
class TweetDB():
    def __init__(self):
        conf = Configuration()
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.mongo = MongoDB(self.ds.db,self.ds.collection)
        self.tweet=""
        self.tokens = ""
        self.i = 0
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store

    def get_tweet_from_db(self):
        where = {
                    "text":{"$exists":"true"},
                    "geo.coordinates":{"$exists":"true"}
                }
        select = {"text":1,"source":1,"geo":1, "user":1,"retweet_count":1,"created_at":1}
        results = self.mongo.find(where,select)
        return results

    def process_tweets(self):
        tweets = self.get_tweet_from_db()
        for rawTweet in tweets:
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i +=  1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i +=  1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i +=  1


    def get_tweet_count(self):
        return self.i
Ejemplo n.º 37
0
 def __init__(self, config, nodeName, loadFromFile = False):
     self.curNode = config.GetChild(nodeName)
     self.rate = float(self.curNode.GetChild("rate").GetValue())
     self.method = self.curNode.GetChild("method").GetValue()
     self.modelPath = self.curNode.GetChild("model_path").GetValue()
     self.people_tag_collection = self.curNode.GetChild('people_tag').GetValue()
     self.blackList = {}
     dbname = self.curNode.GetChild("db").GetValue()
     self.db = MongoDB.getConnection(dbname)
     self.trained = loadFromFile
     if (loadFromFile):
         f = open(self.modelPath, "r")
         for line in f:
             self.blackList[int(line)] = 1
Ejemplo n.º 38
0
class MongoDBUT(unittest.TestCase):
    def setUp(self):
        test_config = TestConfig()
        self.data_path = test_config.get_data_path()
        config = Configuration(os.path.join(self.data_path,'build.ini'))
        db_name = config.get('mongodb1','db_name')
        host = config.get('mongodb1','host')
        config = MongoDBConfig(db_name, host)
        self.db = MongoDB(config)
        
    def testInsert(self):
        hobby = ['AA','BB','CC']
        p1 = People('dustin',34,hobby)
        object_id = self.db.insert(p1.__dict__, 'people')
#        print object_id
        people = self.db.findOne({"_id":object_id},'people')
#        print people
        self.assertEquals(34, people['_age'], 'age should be 34')
        self.assertEquals(['AA','BB','CC'], people['_hobby'], '_hobby should be AA,BB,CC')
        
    def tearDown(self):
        "Delete seed data from testing database"
        self.db.removeAll('people')
Ejemplo n.º 39
0
def quotas_usage(request, project_id, resource):
    token = request.META.get('HTTP_X_AUTH_TOKEN')

    mongodb_info = setting.mongodb_info
    host = mongodb_info['host']
    user = mongodb_info['user']
    password = mongodb_info['password']
    port = mongodb_info['port']
    database = mongodb_info['database']

    m = MongoDB(user, password, host, port, database)
    
    v = Verify()
    v.set_request(KEY_STONE_HOST['host'], KEY_STONE_HOST['port'])
    v.set_tenantname(project_id)
    
    if v.is_token_available(token):
        start_time = int(request.GET.get('start_time'))
        end_time = int(request.GET.get('end_time'))
        response = m.load(resource, project_id, start_time, end_time)
        response_json = json.dumps(response)
        return HttpResponse(response_json, content_type="application/json")
    else:
        return HttpResponse(v.get_request_data())
def action():
    
    is_empty, logs = mysql_operator.get_last_quotas()

    if is_empty:
        write_log("Log is Empty.")
        return

    mongo_operator = MongoDB(user, password, host, port, database)
    for row in logs:
        mongo_operator.save(
            row['resource'], row['in_use'],
            row['created'], row['project_id'])

    for row in logs:
        is_saved = mongo_operator.check(
            row['resource'], row['project_id'],
            row['created'], row['in_use'])

        if is_saved == False :
            mongo_operator.save(
               row['resource'], row['in_use'],
               row['created'], row['project_id'])
        
        is_saved = mongo_operator.check(
            row['resource'], row['project_id'],
            row['created'], row['in_use'])
        if is_saved == False :
            write_log(
                "resource:" + str(row['resource']) +
                " project_id:" + str(row['project_id']) +
                " created:" + str(row['created']) +
                " in_use:" + str(row['in_use']) +
                " write failed." )

    mysql_operator.clear_old_quotas()
Ejemplo n.º 41
0
        'count': 'number of synapses to generate',
    }

    parser = argparse.ArgumentParser(help['mon_gen'])
    parser.add_argument('count', type=int, help=help['count'])
    parser.add_argument('-p', type=int, default=27017, help=help['port'])

    # Return parsed dictionary
    return vars(parser.parse_args())

if __name__ == '__main__':

    # Get argument dictionary
    argd = parse_args()
    # Make the database
    db = MongoDB(argd['p'])
    # Clear database
    db.reset()
    print('cleared database')

    # Parse the entries
    t0 = time.time()
    all_entries = generate_synapses(argd['count'])
    t1 = time.time()
    print('generated synapses in {:.2f} sec'.format(t1-t0))

    # add all entries
    db.add_points(all_entries)
    t2 = time.time()
    print('wrote db in {:.2f} sec'.format(t2-t1))
Ejemplo n.º 42
0
    def __init__(self, conf):
        self.db_engine = conf.database.engine
        self.db = conf.database.db
        self.collection = conf.database.collection
        
        self.counter = 0
        self.location = ""
        self.geo = conf.geo.geo
        self.writefile = conf.output.write
        self.geowrite = conf.geo.write
        self.userwrite = conf.user.write
        self.tweetwrite = conf.tweet.write
        self.wordswrite = conf.words.write

        if self.geo:
            self.geoEngine = conf.geo.engine
            if self.geoEngine == "google":
                self.googleLimit = conf.geo.limit
                
        self.usetime = conf.output.filenamewithdate
        self.outdir = conf.output.directory

        if self.outdir:
            if not os.path.exists(self.outdir):
                os.makedirs(self.outdir)
                
        self.tweetfilename = conf.tweet.filename
        self.geofilename = conf.geo.filename
        self.userfilename = conf.user.filename
        self.wordsfilename = conf.words.filename
        
        if self.usetime:
            time = datetime.now()
            time = time.strftime("%Y-%m-%d")
            self.outdir += "%s/" % (time)

            if not os.path.exists(self.outdir):
                os.makedirs(self.outdir)
                 
        self.tweetfields = conf.tweet.fields.split(",")
        self.userfields = conf.user.fields.split(",")
        
        self.format = conf.output.format

        self.out_tweets_file = "%s%s.%s" % (self.outdir,self.tweetfilename,self.format)
        self.out_geo_file = "%s%s.%s" % (self.outdir,self.geofilename,self.format)
        self.out_user_file = "%s%s.%s" % (self.outdir,self.userfilename,self.format)
        self.out_words_file = "%s%s.%s" % (self.outdir,self.wordsfilename,self.format)

        self.store = conf.database.store
        if self.store:
            try:
                if self.db_engine == "mongo":
                    self.mongo = MongoDB(self.db,self.collection)
                else:
                    print "Currently only MongoDB supported for storing tweets. \nContact [email protected] for additional support"
                    print "==================================================\n"
                    self.mongo = MongoDB(self.db,self.collection)
            except:
                print "Couldn't find database driver. Storing option disabled"
                print "==================================================\n"
                self.store = False
                pass
Ejemplo n.º 43
0
# RSSReader().read_rss('http://glavnoe.ua/rss/newsall.xml','glavnoe.ua')
# RSSReader().read_rss('http://glavcom.ua/rss.xml','glavcom.ua')
# RSSReader().read_rss('http://www.unn.com.ua/rss/news_uk.xml','unn.com.ua')
# RSSReader().read_rss('http://joinfo.ua/rss/main.xml','joinfo.ua')
# RSSReader().read_rss('http://focus.ua/modules/rss.php','focus.ua')
# RSSReader().read_rss('http://comments.ua/export/rss_ru.xml','comments.ua')

data = RSSReader().read_rss('http://focus.ua/modules/rss.php','focus.ua')
# print '\n\ndata_received!!!\n\n'
# for i in data:
# 	# print "Original:".encode('utf8') + str(i[2].encode('utf8')) 
# 	print ExtraMethods().remove_tags(i[2].encode('utf8'))
# 	print '\n\n'



database = MongoDB()
for item in data:
	db_query = database.makeArticleInfoQuery(item)
	if database.verifyQueryIsReady(db_query):
		database.writeArticleInfo(db_query)
	else:
		print 'Error item#' + str(data.index(item))

database.printDB()



#need to resolve decode issue http://galinfo.com.ua/rss/export.rss

Ejemplo n.º 44
0
class DataSet():
    def __init__(self, conf):
        self.db_engine = conf.database.engine
        self.db = conf.database.db
        self.collection = conf.database.collection
        
        self.counter = 0
        self.location = ""
        self.geo = conf.geo.geo
        self.writefile = conf.output.write
        self.geowrite = conf.geo.write
        self.userwrite = conf.user.write
        self.tweetwrite = conf.tweet.write
        self.wordswrite = conf.words.write

        if self.geo:
            self.geoEngine = conf.geo.engine
            if self.geoEngine == "google":
                self.googleLimit = conf.geo.limit
                
        self.usetime = conf.output.filenamewithdate
        self.outdir = conf.output.directory

        if self.outdir:
            if not os.path.exists(self.outdir):
                os.makedirs(self.outdir)
                
        self.tweetfilename = conf.tweet.filename
        self.geofilename = conf.geo.filename
        self.userfilename = conf.user.filename
        self.wordsfilename = conf.words.filename
        
        if self.usetime:
            time = datetime.now()
            time = time.strftime("%Y-%m-%d")
            self.outdir += "%s/" % (time)

            if not os.path.exists(self.outdir):
                os.makedirs(self.outdir)
                 
        self.tweetfields = conf.tweet.fields.split(",")
        self.userfields = conf.user.fields.split(",")
        
        self.format = conf.output.format

        self.out_tweets_file = "%s%s.%s" % (self.outdir,self.tweetfilename,self.format)
        self.out_geo_file = "%s%s.%s" % (self.outdir,self.geofilename,self.format)
        self.out_user_file = "%s%s.%s" % (self.outdir,self.userfilename,self.format)
        self.out_words_file = "%s%s.%s" % (self.outdir,self.wordsfilename,self.format)

        self.store = conf.database.store
        if self.store:
            try:
                if self.db_engine == "mongo":
                    self.mongo = MongoDB(self.db,self.collection)
                else:
                    print "Currently only MongoDB supported for storing tweets. \nContact [email protected] for additional support"
                    print "==================================================\n"
                    self.mongo = MongoDB(self.db,self.collection)
            except:
                print "Couldn't find database driver. Storing option disabled"
                print "==================================================\n"
                self.store = False
                pass
                
    def output_tweet(self,tweet):
        if self.writefile:
            self.output_tweets_in_file(tweet)
            
        if self.store:
            self.output_db(tweet)
         
    def output_tweets_in_file(self,result):
        keywords=""
        points = ""
        words = True
        uid = result['user']['id']
        if self.geo:
            if self.geowrite:
                if not type(result["geo"]).__name__  == 'NoneType':
                    lat=0.00
                    long=0.00

                    while len(result["geo"]["coordinates"]) != 0:
                        if len(result["geo"]["coordinates"]) == 2:
                            lat = str(result["geo"]["coordinates"].pop(0))
                        else:
                            long = str(result["geo"]["coordinates"].pop(0))
                    points = "%s %s" % (lat,long)
        
        if self.userwrite:
            user = ""
            if self.userfields:
                i = 0
                ttl = len(self.userfields)
                for field in self.userfields:
                    if field == 'screenname' or field== 'name':
                        user += "\"%s\"" % result["user"][field].encode("UTF-8")
                    elif field== 'description' or field == 'time_zone':
                        user += "\"%s\"" % utils.clean(result["user"][field].encode("ASCII","ignore"))
                    elif field =='created_at':
                        created = result["user"][field].encode("UTF-8")
                        date = parse(created)
                        created = date.strftime("%Y-%m-%d %H:%M:%S")
                        user += "\"%s\"" % created
                    elif not field == 'id':
                        if result["user"][field] != "":
                            user += "%s" % result["user"][field]
                        else:
                            user += "0" 
                    if i < ttl:
                        user += ","
                    i += 1

        if self.tweetwrite:
            tweet = ""
            if self.tweetfields:
                i = 0
                ttl = len(self.tweetfields)
                for field in self.tweetfields:
                    if field == 'source':
                        source = utils.parse_alink(result[field])
                        tweet += "\"%s\"" % source.encode("UTF-8")
                    elif field == 'created_at':
                        created = result[field].encode("UTF-8")
                        date = parse(created)
                        created = date.strftime("%Y-%m-%d %H:%M:%S")
                        tweet += "\"%s\"" % created
                    elif field == 'tokens':
                        list(result[field]).sort()
                        for token in result[field]:
                            keywords += token.lower()+" "
                        keywords = keywords.rstrip().encode("UTF-8")
                        if keywords == "":
                            words = False
                        tweet += "\"%s\"" % keywords
                    elif field == 'text':
                        text = utils.clean(result[field])
                        tweet += "\"%s\"" % text.encode("UTF-8")
                    elif field == 'retweet_count':
                        tweet += "\"%s\"" % str(result[field]).encode("UTF-8").replace("+","")
                        
                    i += 1
                    if i < ttl:
                        tweet += ","
        
        if points != "":
            if keywords == "":
                field = "tokens"
                list(result[field]).sort()
                for token in result[field]:
                    keywords += token.lower()+" "
                keywords = keywords.rstrip().encode("UTF-8")
            geo_data = "%s,\"%s\",\"%s\"" % (uid,points,keywords)
            self.output_data_file(self.out_geo_file, geo_data)
        
        if user != "":
            user_data = "%s,%s" % (uid,user)
            self.output_data_file(self.out_user_file, user_data)
            
        if tweet != "":
            tweets_data = "%s,%s" % (uid,tweet)
            self.output_data_file(self.out_tweets_file, tweets_data)
            
        if self.wordswrite:
            if words:
                if keywords == "":
                    field = "tokens"
                    list(result[field]).sort()
                    for token in result[field]:
                        keywords += token.lower()+" "
                    keywords = keywords.rstrip().encode("UTF-8")
                words_data = "\"%s\"" % keywords
                self.output_data_file(self.out_words_file,words_data)
             
    def output_data_file(self,filename,data):
        if self.format == 'arff':
            #self.output_arff(filename, data)
            print "arff output currently not supported"
        if self.format == 'txt':
            self.output_txt(filename, data)
            
    def output_arff(self,filename,data):        
        if not os.path.exists(filename):
            header = '''
% Title: TweetStream Dataset
%
% Sources:
%      (a) Creator: M. Fazle Taher
%      (b) Email: [email protected]
%
@relation tweet-stream

@attribute tweet string
@attribute source string

@data
'''
            with open(filename,"w") as fp:
                fp.write(header)
                fp.close()

        with open(filename,"ab") as fp:
            fp.writelines("%s\n" % data.strip())
            fp.close()
            
    def output_txt(self,filename,data):
        with open(filename,"a") as fp:
            fp.writelines("%s\n" % (data))
            fp.close()

    def output_db(self,tweet):
        if self.store:
            self.mongo.insert(tweet)

    def get_region_country_from_points(self,latitude,longitude):
        if self.counter < self.google_limit:
            gcoder = Geocoder()
            results = gcoder.reverse_geocode(latitude,longitude)
            region = results.administrative_area_level_1__short_name
            country = results.country
            counter=counter+1
            self.location="%s %s" %(region,country)
Ejemplo n.º 45
0
def makeTotalSearch():
	data_base = MongoDB()
	return data_base.findElements('article_info')
Ejemplo n.º 46
0
def makeSearchByWord(word):
	data_base = MongoDB()
	return data_base.findElemetByWord('article_info',word)
Ejemplo n.º 47
0
 def __init__(self):
     MongoDB.__init__(self)
Ejemplo n.º 48
0
    help = {
        'mon_key': 'read single value from mongodb',
        'key': 'integer synapse id to read',
        'port': 'port for the database',
    }

    parser = argparse.ArgumentParser(help['mon_key'])
    parser.add_argument('key', type=int, help=help['key'])
    parser.add_argument('-p', type=int, default=27017, help=help['port'])

    # Return parsed dictionary
    return vars(parser.parse_args())

if __name__ == '__main__':

    # Get argument dictionary
    argd = parse_args()
    # Make the database
    key = argd['key']
    db = MongoDB(argd['p'])
    # Start timing mongo lookup
    t0 = time.time()
    # Read the value from mongo db
    in_bounds = db.check_key(key)
    t1 = time.time()
    # Print time taken to check bounds
    print("""{}

    in {:.2f} seconds
    """.format(in_bounds, t1-t0))
Ejemplo n.º 49
0
            doc = doc.decode("gbk").encode("utf-8")
        except:
            page_id  += process_num
            continue
        page_id  += process_num
        soup = BeautifulSoup(doc)
        word = soup.find('h1', "title")
        if word:
            #baike.append({'title':word.string, 'url':url, 'html':doc})
            #if not db.word_dic.find_one({'word':word.string}):
            words.append({'word':word.string, 'len':len(word.string)})
        matchs = soup.findAll(href=re.compile('^/view/\d+.htm'))
        for match in matchs:
            #if match.string:
            if match.string and not db.word_dic.find_one({'word':match.string}):
                words.append({'word':match.string, 'len':len(match.string)})
        if len(words) >= 10:
            db.word_dic.insert(words)
            words = []
            #db.baike.insert(baike)
            #baike = []

if __name__=="__main__":
    db = MongoDB.getConnection('mining')
    process_num = 1
    startindex = 1
    for i in range(startindex, process_num+startindex):
        p = Process(target=son,args=(process_num, i, db))
        p.start()

Ejemplo n.º 50
0
#!/usr/bin/env python
#-*- coding:utf-8 -*-

from mongodb import MongoDB
MongoDB.connectDB('local')
MongoDB.connectDB('mining')
MongoDB.connectDB('recommend')
Ejemplo n.º 51
0
# Finally, let's store all documents into Mongodb

from question import Question
from answer import Answer
from mongodb import MongoDB
from saveImage import save_img, save_text
from urllib import request, error

q = Question("https://www.zhihu.com/question/39547745")
q.setting()
answer_list = q.get_answer_id_list()
#q_info = 'Title : '+q.title+'\nQuestion url : '+q.qurl+'\nQuestion id : '+\
#        str(q.qid)+'\nAsker : '+q.asker+'\nNumber of Follows : '+str(q.num_of_follows)+\
#        '\nNumber of Comments : '+ str(q.num_of_comments)+'\nNumber of Answers : '+str(q.num_of_answers)
#save_text('./%s/' % str(q.qid), str(q.qid)+'_info', q_info)
mongo = MongoDB(str(q.qid), False) # in inintialization, it will connect Mongodb server
'''
mongo.insertData('Questions', {
        'title': q.title,
        'question_url': q.qurl,
        'question_id': q.qid,
        'asker': q.asker,
        'num_of_follows': q.num_of_follows,
        'num_of_comments': q.num_of_comments,
        'num_of_answers': q.num_of_answers,
        'answers_list': answer_list,
        })
# Insert all data
for answer_id in answer_list:
    a = Answer(answer_id)
    a.setting()