def __init__(self, *args, **kwargs): self.location_name = location_name = kwargs.get("location_name") self.name = location_name + "_detailed" self.location = location = LocationManager().get_location(location_name) self.document_ids_ready_for_processing = [] self.detailed_collection = MongoDB(location.detailedCollectionName) self.recent_collection = MongoDB(location.recentCollectionName) self.start_urls = [self.next_url()] self.proxy = ProxyProvider.provide() super().__init__(name=self.name) self.logger.info(f"DetailedItemsSpider initialized")
def __init__(self, tab_images): super(Collector, self).__init__() self._lock = threading.RLock() self._db = MongoDB() self._thread_stop = False self._images = [] self._null_times = 0 self._read_pos = -1 self._write_pos = -1 self._tab_images = tab_images self._max_size = int( tools.get_conf_value('../config.conf', "image_collector", "max_size")) self._interval = int( tools.get_conf_value('../config.conf', "image_collector", "sleep_time")) self._allowed_null_times = int( tools.get_conf_value('../config.conf', "image_collector", 'allowed_null_times')) self._image_count = int( tools.get_conf_value('../config.conf', "image_collector", "images_count")) #初始时将正在做的任务至为未做 self._db.update(self._tab_images, {'image_pron_status': Constance.DOING}, {'image_pron_status': Constance.TODO}) self._db.set_ensure_index(self._tab_images, 'image_pron_status') self._finished_callback = None
def extract_and_upload_text_from_images(bucket_name, tiff_documents_list, filing_type): print("Length of mini_tiff_documents_list:", len(tiff_documents_list)) aws_s3_sdk_controller = AwsS3SdkController() mini_thread_postgresql_client = PostgreSQLClient() mongodb_client = MongoDB() for json_object in tiff_documents_list: try: document_id = json_object.get("document_id") # print(json_object.get("tiff_document_name")) # print("Document ID:", document_id) tiff_document = aws_s3_sdk_controller.download_specific_s3_file(bucket_name, json_object.get("tiff_document_name")) extracted_string = string_extracton_v3.run_string_extraction(tiff_document, filing_type) mongodb_client.insert_document_into_database(document_id, extracted_string) mini_thread_postgresql_client.update_mysql_document_tracking(document_id) except Exception as error: print("[ERROR] Tiff File Name:", json_object.get("tiff_document_name")) print("[ERROR] Document ID:", document_id) print("[ERROR] extract_and_upload_text_from_images", error)
def deleteJob(job_id): mDB = MongoDB() Col = mDB.DB["Jobs"] Col.delete_one({"_id": ObjectId(job_id)}) x = Scheduler() x.deleteJob(job_id)
def test_insertar_elemento_incorrecto(): """Test 6: intento fallido de insertar una nueva mascota puesto que alguno de los parámetros para establecer la conexión no son correctos.""" nueva_mascota = { 'id': '1', 'nombre': 'Simba', 'tipo_animal': 'cat', 'raza': 'angora', 'tamanio': 'small', 'genero': 'male', 'edad': 'young', 'tipo_pelaje': 'short', 'estado': 'adoptable', 'ninios': 'no', 'gatos': 'yes', 'perros': 'no', 'ciudad': 'Granada', 'pais': 'España' } """Para ello modificamos la conexión a la base de datos para que sea incorrecta.""" conexion_incorrecta = MongoDB(os.environ.get("MONGODB_URI"), 'PetfinderBD', 'mascotas') conexion_incorrecta.coleccion = None with pytest.raises(CollectionNotFound): assert conexion_incorrecta.insertar_elemento(nueva_mascota)
def write_db(collection_name, db_name, entity_generator): """Used to write the entity name and the frequency of the entities to a MongoDB database""" mongodb = MongoDB(db_name=db_name) col = mongodb.db.get_collection(collection_name) for entity_list in entity_generator: entities = Counter(list(entity_list)) item = list({ 'entity': i[0], 'f': i[1] } for i in entities.most_common()) for value in item: if value['entity']: new_entity = search_acronyms(value['entity']) new_id = sub(r'[\s-]+', '', str(new_entity).lower()) # Appending entities to the MongoDB result = col.find_one({'_id': new_id}) print '.', if result: col.update_one({'_id': new_id}, { '$set': { 'entity': __compare(new_entity, result['entity']), 'f': value['f'] + result['f'], } }, upsert=False) else: col.insert_one({ '_id': new_id, 'entity': new_entity, 'f': value['f'], }) print print 'Process completed successfully!!!'
def action(): is_empty, logs = mysql_operator.get_last_quotas() if is_empty: write_log("Log is Empty.") return mongo_operator = MongoDB(user, password, host, port, database) for row in logs: mongo_operator.save(row['resource'], row['in_use'], row['created'], row['project_id']) for row in logs: is_saved = mongo_operator.check(row['resource'], row['project_id'], row['created'], row['in_use']) if is_saved == False: mongo_operator.save(row['resource'], row['in_use'], row['created'], row['project_id']) is_saved = mongo_operator.check(row['resource'], row['project_id'], row['created'], row['in_use']) if is_saved == False: write_log("resource:" + str(row['resource']) + " project_id:" + str(row['project_id']) + " created:" + str(row['created']) + " in_use:" + str(row['in_use']) + " write failed.") mysql_operator.clear_old_quotas()
def create_app(): app = Flask(__name__) db = MongoDB() @app.route('/') def index(): items = db.get_items() item_view_model = ViewModel(items) return render_template('index.html', view_model=item_view_model) @app.route('/', methods=['POST']) def add_item(): title = request.form['item_title'] db.add_item(title) return redirect(url_for('index')) @app.route('/items/<id>', methods=['POST']) def mark_item_as_complete(id): db.mark_as_complete(id) return redirect(url_for('index')) @app.route('/items/delete/<id>', methods=['POST']) def delete_item(id): db.remove_item(id) return redirect(url_for('index')) return app
def api_economy(self): print('Executing api_economy') gdp_india = {} for record in self.data['records']: gdp = dict() # taking out yearly GDP value from records gdp['GDP_in_rs_cr'] = int( record['gross_domestic_product_in_rs_cr_at_2004_05_prices']) gdp_india[record['financial_year']] = gdp gdp_india_yrs = list(gdp_india) for i in range(len(gdp_india_yrs)): if i == 0: pass else: key = 'GDP_Growth_' + gdp_india_yrs[i] # calculating GDP growth on yearly basis gdp_india[gdp_india_yrs[i]][key] = round( ((gdp_india[gdp_india_yrs[i]]['GDP_in_rs_cr'] - gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) / gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) * 100, 2) # convert to pandas dataframe gdp_india = pd.DataFrame(list(gdp_india.items()), columns=['financial_year', 'gdp_growth']) # connect to mongodb mongodb_obj = MongoDB('etluser', 'etluser', 'localhost', 'GDP') mongodb_obj.insert_into_db(gdp_india, 'India_GDP')
def __init__(self, *args, **kwargs): self.name = kwargs.get("location_name") + "_recent" delta_timestamp = datetime.now() - timedelta(minutes=3) self.last_stamp = int(datetime.timestamp(delta_timestamp)) self.page = 1 self.location = location = LocationManager().get_location(kwargs.get("location_name")) self.recent_collection = MongoDB(location.recentCollectionName) self.detailed_collection = MongoDB(location.detailedCollectionName) self.url_pattern = 'https://m.avito.ru/api/9/items?key={key}&sort={sort}&locationId={location_id}&page=__page__&lastStamp=__timestamp__&display={display}&limit={limit}'.format( key=API_KEY, sort='date', location_id=location.id, display='list', limit=99) self.start_urls = [self.next_url()] self.proxy = ProxyProvider.provide() super().__init__(name=self.name)
def time(self, update, context): # Handles convo cancellation if update.message.text == "/cancel": context.bot.send_message( update.effective_chat.id, "You have stopped scheduling for a reminder.") return ConversationHandler.END # check if there are 8 numbers in the str message_time = update.message.text if len(message_time) != 4 or message_time.isdigit() == False: context.bot.send_message( update.effective_chat.id, "Please check that you have entered 4 numbers.") return self.TIME hour = message_time[0:2] minute = message_time[2:4] # Check if date is current date or later including the time current_date = datetime.now() input_date = datetime(int(self.year_val), int(self.month_val), int(self.day_val), int(hour), int(minute)) if current_date > input_date: context.bot.send_message( update.effective_chat.id, "You have entered a time in the past. Please re-enter the time(24 hours)." ) return self.TIME # store hour and minute for display self.hour_val = hour self.minute_val = minute # store the time in memory before writing into the database self.time_val = message_time reply_message = "Description: {0}\nDate(Day/Month/Year): {1}/{2}/{3}\nTime(hh:mm): {4}:{5}".format( self.description_val, self.day_val, self.month_val, self.year_val, self.hour_val, self.minute_val) context.bot.send_message(update.effective_chat.id, reply_message) # Init mongodb connection db = MongoDB('heroku_mqncqpgt', 'reminders') db.insertonedb({ "chatid": update.message.chat.id, "description": self.description_val, "date": self.date_val, "time": self.time_val }) return ConversationHandler.END
def deal_item(self, data): rumors = data["results"] mongo = MongoDB(MONGODB_URI, "rumors") for rumor in rumors: rumor_id = generate_hash("{}{}".format(rumor["title"], rumor["rumorType"])) rumor.update({"_id": rumor_id, "source": "丁香园", "agency": "丁香园"}) if self.url_repeat(rumor_id) is False and mongo.insert(rumor): self.update_filter_queue(rumor_id)
def listJob(): mDB = MongoDB() Col = mDB.DB["Jobs"] jobs = Col.find() resp = json.loads(dumps(jobs)) return resp, 200
def __init__(self): conf = Configuration() self.ptext = TextProcess(conf) self.ds = DataSet(conf) self.mongo = MongoDB(self.ds.db, self.ds.collection) self.tweet = "" self.tokens = "" self.i = 0 self.enable_translation = self.ptext.translation self.translation_store = self.ptext.translation_store
def fetch(page): db = MongoDB() uuid = get_uuid() token = CreatToken(page).get_token() params = { 'cityName': cityName, 'cateId': type_, 'areaId': '0', 'sort': '', 'dinnerCountAttrId': '', 'page': page, 'userId': '', 'uuid': uuid, 'platform': '1', 'partner': '126', 'originUrl': originUrl + 'pn{}/'.format(page), 'riskLevel': '1', 'optimusCode': '1', '_token': token } res = requests.get(base_url, params=params, headers=HEADERS) result = json.loads(res.text) items = result['data']['poiInfos'] for item in items: # print(store) store = parse_store(item) # db.save(store) poiId = store['poiId'] commentCount = store['allCommentNum'] max_page = math.ceil(int(commentCount) / 10) comment_list = [] for offset in range(max_page): params = { 'uuid': get_uuid(), 'id': poiId, 'userId': '2490983615', 'offset': offset * 10, 'pageSize': '10', } resp = requests.get(comment_url, params=params, headers=HEADERS) # print(resp.text) result = json.loads(resp.text) items = result['data']['comments'] for item in items: comment = parse_comment(item) print(comment) comment_list.append(comment) store['comment'] = comment_list print(store) db.save(store)
def index(): message = None db = MongoDB(app) if request.method == 'POST': sake_data = check_request_set(request) if not sake_data: message = 'Error' else: db.set_sake(sake_data) message = 'Success!' return render_template('index.html', message=message)
def __init__(self): self.PORT = 9999 self.BUFSIZE = 256 self.server = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.server.bind(('', self.PORT)) print('[Server] Pengsoo Server Ready!') self.device = Device() self.db = MongoDB() self.mpu = mpu6050() self.targetAddr = ''
def regist(): message = None db = MongoDB(app) sake_list = [] if request.method == 'POST': sake_name = check_request_get(request) if not sake_name: message = 'Error' else: sake_list = db.get_sake(sake_name) message = '%s people found' % len(sake_list) return render_template('search.html', message=message, sake_list=sake_list)
def __init__(self, collector, tab_images): super(ImagePornControl, self).__init__() self._collector = collector self._tab_images = tab_images self._deal_image_count = int( tools.get_conf_value('config.conf', "image_porn", "deal_image_count")) self._interval = int( tools.get_conf_value('config.conf', "image_porn", "sleep_time")) self._db = MongoDB() self._image_porn_recg = ImagePornRecg()
def addJob(body): body['dbUsername'] = crypt.encrypt(body['dbUsername']) body['dbPassword'] = crypt.encrypt(body['dbPassword']) mDB = MongoDB() Col = mDB.DB["Jobs"] jobid = Col.insert_one(body).inserted_id resp = json.loads(dumps(jobid)) x = Scheduler() x.addJob(jobid) return resp, (200)
def __init__(self, category): """ :param category: The category you are searching for, e.g. flats /heels :return: None Define the category, base url and a variable tostore the links to all pages. """ self.category = category.lower() self.company = 'saks' self.base_url = 'http://www.saksfifthavenue.com/Shoes/' self.all_links = [] self.params = {'Nao': 0} self.mongo = MongoDB(db_name='shoes', table_name=self.category)
def __init__(self, category): """ :param category: The category you are searching for, e.g. flats /heels :return: None Define the category, base url and a variable to store the links to all pages. """ self.category = category.lower() self.company = 'nordstrom' self.base_url = 'http://shop.nordstrom.com/c/' self.all_links = [] self.params = {'page': 1} self.mongo = MongoDB(db_name='shoes', table_name=self.category)
def __init__(self, category): """ :param category: The category you are searching for, e.g. flats /heels :return: None Define the category, base url and a variable to store the links to all pages. """ self.category = category.lower() self.company = 'barneys' self.base_url = 'http://www.barneys.com/barneys-new-york/women/shoes/' self.all_links = [] self.params = {'start': 0} self.mongo = MongoDB(db_name='shoes', table_name=self.category)
def _compute_idf(self): """ Compute idf :return: """ temp_dict = {} self._mongo_session = MongoDB() self._mongo_session.connect(host="localhost", port=27017, database="crawler", collection="tf_dict") db_tf_results = self._mongo_session.select({}) for result in db_tf_results: for _file, words_dict in result.items(): if _file not in temp_dict: temp_dict[_file] = {} doc_norm = 0 for word, tf in words_dict.items(): if word in self._reverse_index: idf = math.log( self._number_of_docs + 0.1 / float(len(self._reverse_index[word])), 10) idf = float("{0:.6f}".format(idf)) doc_norm += math.pow(tf * idf, 2) temp_dict[_file][word] = { "tf": tf, "idf": idf, "doc": float("{0:.6f}".format(tf * idf)) } temp_dict[_file]['|doc|'] = float("{0:.6f}".format( math.sqrt(doc_norm))) self._mongo_session.connect(host="localhost", port=27017, database="crawler", collection="tf_idf_dict") self._mongo_session.insert_document(temp_dict, "tf_idf_dict") self._mongo_session.disconnect()
def __init__(self, retry: int = 3, timeout: int = 5): super().__init__(retry, timeout) self.logger = Logger(folder="zhima") self.url = "http://webapi.http.zhimacangku.com/getip?num=1&type=2&pro=&city=0&yys=0&port=11&time=1&ts=1&ys=0&cs=1&lb=1&sb=0&pb=4&mr=1®ions=" self.white = "http://web.http.cnapi.cc/index/index/save_white?neek=80313&appkey=1745838ce83ef74c512a3d200585c1b4&white=" client = MongoDB() if "local_rw" in client: self.reader = client["local_rw"]["proxies"] else: raise RuntimeError( "The specified configuration item could not be found.") if "local_rw" in client: self.writer = client["local_rw"]["proxies"] else: raise RuntimeError( "The specified configuration item could not be found.")
async def geting(secret_id: str, code_phrase: str) -> dict: """ Processes the request and makes a query to the database for reading. Example: /secrets/5eb82d06b893f7227b4f73ff?code_phrase=code_password :param secret_id: :type secret_id: str :param code_phrase: :type code_phrase: str :return: decrypted secret or an error :rtype: dict """ db = MongoDB() return {"secret": db.get_secret(secret_id, code_phrase)}
async def generating(secret: str, code_phrase: str) -> dict: """ Processes request and makes a query to the database for a record. Example: /generate?secret=super_secret_message&code_phrase=code_password :param secret: secret message :type secret: str :param code_phrase: for access control :type code_phrase: str :return: response with secret_id :rtype: dict """ db = MongoDB() return {"secret_id": db.create_secret(secret, code_phrase)}
def clear_db_tables(host, port, database, collections): """ Clear some tables firstly : TF_DICT, etc :param host: :param port: :param database: :param collections: A list of collection that you want to delete :return: """ mongo_session = MongoDB() for c in collections: mongo_session.connect(host=host, port=port, database=database, collection=c) mongo_session.clear({}, c) mongo_session.disconnect()
def test_app(): load_dotenv(override=True) # Create the new board & update the board id environment variable os.environ['COLLECTION_NAME'] = 'test-todos' # construct the new application application = app.create_app() # start the app in its own thread. thread = Thread(target=lambda: application.run(use_reloader=False)) thread.daemon = True thread.start() yield application # Tear Down thread.join(1) mongodb = MongoDB() mongodb.get_collection().drop()
def __init__(self, search_term): """ :param search_term: The term you search for, e.g. flats / pumps :return: None Define the search term, base url and a variable to store the links to all the pages related to the serach term """ self.search_term = search_term self.company = 'saks' self.params = {'SearchString': self.search_term, 'Nao': 0} self.base_url = 'http://www.saksfifthavenue.com/search/EndecaSearch.jsp?\ bmForm=endeca_search_form_one&bmFormID=kKYnHcK&bmUID=kKYnHcL&bmIsForm=true\ &bmPrevTemplate=%2Fmain%2FSectionPage.jsp&bmText=SearchString&submit-search=\ &bmSingle=N_Dim&N_Dim=0&bmHidden=Ntk&Ntk=Entire+Site&bmHidden=Ntx\ &Ntx=mode%2Bmatchpartialmax&bmHidden=prp8&prp8=t15&bmHidden=prp13&prp13=\ &bmHidden=sid&sid=14BBCA598131&bmHidden=FOLDER%3C%3Efolder_id&FOLDER%3C%3Efolder_id=' self.base_url = self.base_url.replace(' ', '') self.all_links = [] self.mongo = MongoDB(db_name='shoe', table_name=search_term)