def __init__(self, *args, **kwargs):
     self.location_name = location_name = kwargs.get("location_name")
     self.name = location_name + "_detailed"
     self.location = location = LocationManager().get_location(location_name)
     self.document_ids_ready_for_processing = []
     self.detailed_collection = MongoDB(location.detailedCollectionName)
     self.recent_collection = MongoDB(location.recentCollectionName)
     self.start_urls = [self.next_url()]
     self.proxy = ProxyProvider.provide()
     super().__init__(name=self.name)
     self.logger.info(f"DetailedItemsSpider initialized")
    def __init__(self, tab_images):
        super(Collector, self).__init__()
        self._lock = threading.RLock()

        self._db = MongoDB()
        self._thread_stop = False
        self._images = []
        self._null_times = 0
        self._read_pos = -1
        self._write_pos = -1
        self._tab_images = tab_images
        self._max_size = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 "max_size"))
        self._interval = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 "sleep_time"))
        self._allowed_null_times = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 'allowed_null_times'))
        self._image_count = int(
            tools.get_conf_value('../config.conf', "image_collector",
                                 "images_count"))

        #初始时将正在做的任务至为未做
        self._db.update(self._tab_images,
                        {'image_pron_status': Constance.DOING},
                        {'image_pron_status': Constance.TODO})
        self._db.set_ensure_index(self._tab_images, 'image_pron_status')

        self._finished_callback = None
Beispiel #3
0
def extract_and_upload_text_from_images(bucket_name, tiff_documents_list, filing_type):

    print("Length of mini_tiff_documents_list:", len(tiff_documents_list))

    aws_s3_sdk_controller = AwsS3SdkController()
    mini_thread_postgresql_client = PostgreSQLClient()
    mongodb_client = MongoDB()

    for json_object in tiff_documents_list:
        try:
            document_id = json_object.get("document_id")

            # print(json_object.get("tiff_document_name"))
            # print("Document ID:", document_id)

            tiff_document = aws_s3_sdk_controller.download_specific_s3_file(bucket_name,
                                                                            json_object.get("tiff_document_name"))
            extracted_string = string_extracton_v3.run_string_extraction(tiff_document, filing_type)

            mongodb_client.insert_document_into_database(document_id, extracted_string)

            mini_thread_postgresql_client.update_mysql_document_tracking(document_id)

        except Exception as error:
            print("[ERROR]  Tiff File Name:", json_object.get("tiff_document_name"))
            print("[ERROR]  Document ID:", document_id)
            print("[ERROR]  extract_and_upload_text_from_images", error)
Beispiel #4
0
def deleteJob(job_id):
    mDB = MongoDB()
    Col = mDB.DB["Jobs"]
    Col.delete_one({"_id": ObjectId(job_id)})

    x = Scheduler()
    x.deleteJob(job_id)
Beispiel #5
0
def test_insertar_elemento_incorrecto():
    """Test 6: intento fallido de insertar una nueva mascota puesto que alguno
        de los parámetros para establecer la conexión no son correctos."""
    nueva_mascota = {
        'id': '1',
        'nombre': 'Simba',
        'tipo_animal': 'cat',
        'raza': 'angora',
        'tamanio': 'small',
        'genero': 'male',
        'edad': 'young',
        'tipo_pelaje': 'short',
        'estado': 'adoptable',
        'ninios': 'no',
        'gatos': 'yes',
        'perros': 'no',
        'ciudad': 'Granada',
        'pais': 'España'
    }
    """Para ello modificamos la conexión a la base de datos para que sea incorrecta."""
    conexion_incorrecta = MongoDB(os.environ.get("MONGODB_URI"), 'PetfinderBD',
                                  'mascotas')
    conexion_incorrecta.coleccion = None
    with pytest.raises(CollectionNotFound):
        assert conexion_incorrecta.insertar_elemento(nueva_mascota)
Beispiel #6
0
def write_db(collection_name, db_name, entity_generator):
    """Used to write the entity name and the frequency of the entities to a MongoDB database"""
    mongodb = MongoDB(db_name=db_name)
    col = mongodb.db.get_collection(collection_name)

    for entity_list in entity_generator:
        entities = Counter(list(entity_list))
        item = list({
            'entity': i[0],
            'f': i[1]
        } for i in entities.most_common())

        for value in item:
            if value['entity']:
                new_entity = search_acronyms(value['entity'])
                new_id = sub(r'[\s-]+', '', str(new_entity).lower())
                # Appending entities to the MongoDB
                result = col.find_one({'_id': new_id})
                print '.',
                if result:
                    col.update_one({'_id': new_id}, {
                        '$set': {
                            'entity': __compare(new_entity, result['entity']),
                            'f': value['f'] + result['f'],
                        }
                    },
                                   upsert=False)
                else:
                    col.insert_one({
                        '_id': new_id,
                        'entity': new_entity,
                        'f': value['f'],
                    })
        print
    print 'Process completed successfully!!!'
Beispiel #7
0
def action():

    is_empty, logs = mysql_operator.get_last_quotas()

    if is_empty:
        write_log("Log is Empty.")
        return

    mongo_operator = MongoDB(user, password, host, port, database)
    for row in logs:
        mongo_operator.save(row['resource'], row['in_use'], row['created'],
                            row['project_id'])

    for row in logs:
        is_saved = mongo_operator.check(row['resource'], row['project_id'],
                                        row['created'], row['in_use'])

        if is_saved == False:
            mongo_operator.save(row['resource'], row['in_use'], row['created'],
                                row['project_id'])

        is_saved = mongo_operator.check(row['resource'], row['project_id'],
                                        row['created'], row['in_use'])
        if is_saved == False:
            write_log("resource:" + str(row['resource']) + " project_id:" +
                      str(row['project_id']) + " created:" +
                      str(row['created']) + " in_use:" + str(row['in_use']) +
                      " write failed.")

    mysql_operator.clear_old_quotas()
def create_app():
    app = Flask(__name__)
    db = MongoDB()

    @app.route('/')
    def index():
        items = db.get_items()
        item_view_model = ViewModel(items)
        return render_template('index.html', view_model=item_view_model)

    @app.route('/', methods=['POST'])
    def add_item():
        title = request.form['item_title']
        db.add_item(title)
        return redirect(url_for('index'))

    @app.route('/items/<id>', methods=['POST'])
    def mark_item_as_complete(id):
        db.mark_as_complete(id)
        return redirect(url_for('index'))

    @app.route('/items/delete/<id>', methods=['POST'])
    def delete_item(id):
        db.remove_item(id)
        return redirect(url_for('index'))

    return app
Beispiel #9
0
    def api_economy(self):
        print('Executing api_economy')
        gdp_india = {}
        for record in self.data['records']:
            gdp = dict()

            # taking out yearly GDP value from records
            gdp['GDP_in_rs_cr'] = int(
                record['gross_domestic_product_in_rs_cr_at_2004_05_prices'])
            gdp_india[record['financial_year']] = gdp
            gdp_india_yrs = list(gdp_india)

        for i in range(len(gdp_india_yrs)):
            if i == 0:
                pass
            else:
                key = 'GDP_Growth_' + gdp_india_yrs[i]
                # calculating GDP growth on yearly basis
                gdp_india[gdp_india_yrs[i]][key] = round(
                    ((gdp_india[gdp_india_yrs[i]]['GDP_in_rs_cr'] -
                      gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) /
                     gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) * 100, 2)

        # convert to pandas dataframe
        gdp_india = pd.DataFrame(list(gdp_india.items()),
                                 columns=['financial_year', 'gdp_growth'])

        # connect to mongodb
        mongodb_obj = MongoDB('etluser', 'etluser', 'localhost', 'GDP')
        mongodb_obj.insert_into_db(gdp_india, 'India_GDP')
Beispiel #10
0
 def __init__(self, *args, **kwargs):
     self.name = kwargs.get("location_name") + "_recent"
     delta_timestamp = datetime.now() - timedelta(minutes=3)
     self.last_stamp = int(datetime.timestamp(delta_timestamp))
     self.page = 1
     self.location = location = LocationManager().get_location(kwargs.get("location_name"))
     self.recent_collection = MongoDB(location.recentCollectionName)
     self.detailed_collection = MongoDB(location.detailedCollectionName)
     self.url_pattern = 'https://m.avito.ru/api/9/items?key={key}&sort={sort}&locationId={location_id}&page=__page__&lastStamp=__timestamp__&display={display}&limit={limit}'.format(
         key=API_KEY,
         sort='date',
         location_id=location.id,
         display='list',
         limit=99)
     self.start_urls = [self.next_url()]
     self.proxy = ProxyProvider.provide()
     super().__init__(name=self.name)
Beispiel #11
0
    def time(self, update, context):
        # Handles convo cancellation
        if update.message.text == "/cancel":
            context.bot.send_message(
                update.effective_chat.id,
                "You have stopped scheduling for a reminder.")
            return ConversationHandler.END

        # check if there are 8 numbers in the str
        message_time = update.message.text

        if len(message_time) != 4 or message_time.isdigit() == False:
            context.bot.send_message(
                update.effective_chat.id,
                "Please check that you have entered 4 numbers.")

            return self.TIME

        hour = message_time[0:2]
        minute = message_time[2:4]

        # Check if date is current date or later including the time
        current_date = datetime.now()
        input_date = datetime(int(self.year_val), int(self.month_val),
                              int(self.day_val), int(hour), int(minute))

        if current_date > input_date:
            context.bot.send_message(
                update.effective_chat.id,
                "You have entered a time in the past. Please re-enter the time(24 hours)."
            )

            return self.TIME

        # store hour and minute for display
        self.hour_val = hour
        self.minute_val = minute

        # store the time in memory before writing into the database
        self.time_val = message_time

        reply_message = "Description: {0}\nDate(Day/Month/Year): {1}/{2}/{3}\nTime(hh:mm): {4}:{5}".format(
            self.description_val, self.day_val, self.month_val, self.year_val,
            self.hour_val, self.minute_val)
        context.bot.send_message(update.effective_chat.id, reply_message)

        # Init mongodb connection
        db = MongoDB('heroku_mqncqpgt', 'reminders')
        db.insertonedb({
            "chatid": update.message.chat.id,
            "description": self.description_val,
            "date": self.date_val,
            "time": self.time_val
        })

        return ConversationHandler.END
Beispiel #12
0
    def deal_item(self, data):
        rumors = data["results"]
        mongo = MongoDB(MONGODB_URI, "rumors")

        for rumor in rumors:
            rumor_id = generate_hash("{}{}".format(rumor["title"],
                                                   rumor["rumorType"]))
            rumor.update({"_id": rumor_id, "source": "丁香园", "agency": "丁香园"})
            if self.url_repeat(rumor_id) is False and mongo.insert(rumor):
                self.update_filter_queue(rumor_id)
Beispiel #13
0
def listJob():

    mDB = MongoDB()

    Col = mDB.DB["Jobs"]
    jobs = Col.find()

    resp = json.loads(dumps(jobs))

    return resp, 200
Beispiel #14
0
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db, self.ds.collection)
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
Beispiel #15
0
def fetch(page):
    db = MongoDB()

    uuid = get_uuid()
    token = CreatToken(page).get_token()
    params = {
        'cityName': cityName,
        'cateId': type_,
        'areaId': '0',
        'sort': '',
        'dinnerCountAttrId': '',
        'page': page,
        'userId': '',
        'uuid': uuid,
        'platform': '1',
        'partner': '126',
        'originUrl': originUrl + 'pn{}/'.format(page),
        'riskLevel': '1',
        'optimusCode': '1',
        '_token': token
    }

    res = requests.get(base_url, params=params, headers=HEADERS)
    result = json.loads(res.text)
    items = result['data']['poiInfos']
    for item in items:
        # print(store)
        store = parse_store(item)
        # db.save(store)

        poiId = store['poiId']
        commentCount = store['allCommentNum']
        max_page = math.ceil(int(commentCount) / 10)
        comment_list = []
        for offset in range(max_page):
            params = {
                'uuid': get_uuid(),
                'id': poiId,
                'userId': '2490983615',
                'offset': offset * 10,
                'pageSize': '10',
            }

            resp = requests.get(comment_url, params=params, headers=HEADERS)
            # print(resp.text)
            result = json.loads(resp.text)
            items = result['data']['comments']
            for item in items:
                comment = parse_comment(item)
                print(comment)
                comment_list.append(comment)
        store['comment'] = comment_list
        print(store)
        db.save(store)
Beispiel #16
0
def index():
    message = None
    db = MongoDB(app)
    if request.method == 'POST':
        sake_data = check_request_set(request)
        if not sake_data:
            message = 'Error'
        else:
            db.set_sake(sake_data)
            message = 'Success!'

    return render_template('index.html', message=message)
Beispiel #17
0
    def __init__(self):
        self.PORT = 9999
        self.BUFSIZE = 256
        self.server = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.server.bind(('', self.PORT))
        print('[Server] Pengsoo Server Ready!')

        self.device = Device()
        self.db = MongoDB()
        self.mpu = mpu6050()

        self.targetAddr = ''
Beispiel #18
0
def regist():
    message = None
    db = MongoDB(app)
    sake_list = []
    if request.method == 'POST':
        sake_name = check_request_get(request)
        if not sake_name:
            message = 'Error'
        else:
            sake_list = db.get_sake(sake_name)
            message = '%s people found' % len(sake_list)

    return render_template('search.html', message=message, sake_list=sake_list)
    def __init__(self, collector, tab_images):
        super(ImagePornControl, self).__init__()

        self._collector = collector
        self._tab_images = tab_images

        self._deal_image_count = int(
            tools.get_conf_value('config.conf', "image_porn",
                                 "deal_image_count"))
        self._interval = int(
            tools.get_conf_value('config.conf', "image_porn", "sleep_time"))

        self._db = MongoDB()
        self._image_porn_recg = ImagePornRecg()
Beispiel #20
0
def addJob(body):

    body['dbUsername'] = crypt.encrypt(body['dbUsername'])
    body['dbPassword'] = crypt.encrypt(body['dbPassword'])

    mDB = MongoDB()
    Col = mDB.DB["Jobs"]

    jobid = Col.insert_one(body).inserted_id
    resp = json.loads(dumps(jobid))

    x = Scheduler()
    x.addJob(jobid)
    return resp, (200)
Beispiel #21
0
    def __init__(self, category):
        """
        :param category: The category you are searching for, e.g. flats /heels
        :return: None
        Define the category, base url and a variable tostore
        the links to all pages.
        """
        self.category = category.lower()
        self.company = 'saks'
        self.base_url = 'http://www.saksfifthavenue.com/Shoes/'

        self.all_links = []

        self.params = {'Nao': 0}
        self.mongo = MongoDB(db_name='shoes', table_name=self.category)
    def __init__(self, category):
        """
        :param category: The category you are searching for, e.g. flats /heels
        :return: None
        Define the category, base url and a variable to
        store the links to all pages.
        """
        self.category = category.lower()
        self.company = 'nordstrom'
        self.base_url = 'http://shop.nordstrom.com/c/'

        self.all_links = []

        self.params = {'page': 1}
        self.mongo = MongoDB(db_name='shoes', table_name=self.category)
    def __init__(self, category):
        """
        :param category: The category you are searching for, e.g. flats /heels
        :return: None
        Define the category, base url and a variable to store the
        links to all pages.
        """
        self.category = category.lower()
        self.company = 'barneys'
        self.base_url = 'http://www.barneys.com/barneys-new-york/women/shoes/'

        self.all_links = []

        self.params = {'start': 0}
        self.mongo = MongoDB(db_name='shoes', table_name=self.category)
Beispiel #24
0
    def _compute_idf(self):
        """
        Compute idf
        :return: 
        """
        temp_dict = {}

        self._mongo_session = MongoDB()
        self._mongo_session.connect(host="localhost",
                                    port=27017,
                                    database="crawler",
                                    collection="tf_dict")
        db_tf_results = self._mongo_session.select({})

        for result in db_tf_results:

            for _file, words_dict in result.items():

                if _file not in temp_dict:
                    temp_dict[_file] = {}

                doc_norm = 0

                for word, tf in words_dict.items():

                    if word in self._reverse_index:
                        idf = math.log(
                            self._number_of_docs +
                            0.1 / float(len(self._reverse_index[word])), 10)
                        idf = float("{0:.6f}".format(idf))

                        doc_norm += math.pow(tf * idf, 2)

                        temp_dict[_file][word] = {
                            "tf": tf,
                            "idf": idf,
                            "doc": float("{0:.6f}".format(tf * idf))
                        }

                temp_dict[_file]['|doc|'] = float("{0:.6f}".format(
                    math.sqrt(doc_norm)))

        self._mongo_session.connect(host="localhost",
                                    port=27017,
                                    database="crawler",
                                    collection="tf_idf_dict")
        self._mongo_session.insert_document(temp_dict, "tf_idf_dict")
        self._mongo_session.disconnect()
Beispiel #25
0
 def __init__(self, retry: int = 3, timeout: int = 5):
     super().__init__(retry, timeout)
     self.logger = Logger(folder="zhima")
     self.url = "http://webapi.http.zhimacangku.com/getip?num=1&type=2&pro=&city=0&yys=0&port=11&time=1&ts=1&ys=0&cs=1&lb=1&sb=0&pb=4&mr=1&regions="
     self.white = "http://web.http.cnapi.cc/index/index/save_white?neek=80313&appkey=1745838ce83ef74c512a3d200585c1b4&white="
     client = MongoDB()
     if "local_rw" in client:
         self.reader = client["local_rw"]["proxies"]
     else:
         raise RuntimeError(
             "The specified configuration item could not be found.")
     if "local_rw" in client:
         self.writer = client["local_rw"]["proxies"]
     else:
         raise RuntimeError(
             "The specified configuration item could not be found.")
Beispiel #26
0
async def geting(secret_id: str, code_phrase: str) -> dict:
    """
    Processes the request and makes a query
    to the database for reading.

    Example:
    /secrets/5eb82d06b893f7227b4f73ff?code_phrase=code_password

    :param secret_id:
    :type secret_id: str
    :param code_phrase:
    :type code_phrase: str
    :return: decrypted secret or an error
    :rtype: dict
    """
    db = MongoDB()
    return {"secret": db.get_secret(secret_id, code_phrase)}
Beispiel #27
0
async def generating(secret: str, code_phrase: str) -> dict:
    """
    Processes request and makes a query
    to the database for a record.

    Example:
    /generate?secret=super_secret_message&code_phrase=code_password

    :param secret: secret message
    :type secret: str
    :param code_phrase: for access control
    :type code_phrase: str
    :return: response with secret_id
    :rtype: dict
    """
    db = MongoDB()
    return {"secret_id": db.create_secret(secret, code_phrase)}
Beispiel #28
0
def clear_db_tables(host, port, database, collections):
    """
    Clear some tables firstly : TF_DICT, etc
    :param host: 
    :param port: 
    :param database: 
    :param collections: A list of collection that you want to delete
    :return: 
    """
    mongo_session = MongoDB()

    for c in collections:
        mongo_session.connect(host=host,
                              port=port,
                              database=database,
                              collection=c)
        mongo_session.clear({}, c)
        mongo_session.disconnect()
def test_app():
    load_dotenv(override=True)

    # Create the new board & update the board id environment variable
    os.environ['COLLECTION_NAME'] = 'test-todos'

    # construct the new application
    application = app.create_app()

    # start the app in its own thread.
    thread = Thread(target=lambda: application.run(use_reloader=False))
    thread.daemon = True
    thread.start()
    yield application

    # Tear Down
    thread.join(1)
    mongodb = MongoDB()
    mongodb.get_collection().drop()
Beispiel #30
0
    def __init__(self, search_term):
        """
        :param search_term: The term you search for, e.g. flats / pumps
        :return: None

        Define the search term, base url and a variable to store the links to
        all the pages related to the serach term
        """
        self.search_term = search_term
        self.company = 'saks'
        self.params = {'SearchString': self.search_term, 'Nao': 0}
        self.base_url = 'http://www.saksfifthavenue.com/search/EndecaSearch.jsp?\
                         bmForm=endeca_search_form_one&bmFormID=kKYnHcK&bmUID=kKYnHcL&bmIsForm=true\
                         &bmPrevTemplate=%2Fmain%2FSectionPage.jsp&bmText=SearchString&submit-search=\
                         &bmSingle=N_Dim&N_Dim=0&bmHidden=Ntk&Ntk=Entire+Site&bmHidden=Ntx\
                         &Ntx=mode%2Bmatchpartialmax&bmHidden=prp8&prp8=t15&bmHidden=prp13&prp13=\
                         &bmHidden=sid&sid=14BBCA598131&bmHidden=FOLDER%3C%3Efolder_id&FOLDER%3C%3Efolder_id='
        self.base_url = self.base_url.replace(' ', '')
        self.all_links = []
        self.mongo = MongoDB(db_name='shoe', table_name=search_term)