def api_economy(self): print('Executing api_economy') gdp_india = {} for record in self.data['records']: gdp = dict() # taking out yearly GDP value from records gdp['GDP_in_rs_cr'] = int( record['gross_domestic_product_in_rs_cr_at_2004_05_prices']) gdp_india[record['financial_year']] = gdp gdp_india_yrs = list(gdp_india) for i in range(len(gdp_india_yrs)): if i == 0: pass else: key = 'GDP_Growth_' + gdp_india_yrs[i] # calculating GDP growth on yearly basis gdp_india[gdp_india_yrs[i]][key] = round( ((gdp_india[gdp_india_yrs[i]]['GDP_in_rs_cr'] - gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) / gdp_india[gdp_india_yrs[i - 1]]['GDP_in_rs_cr']) * 100, 2) # convert to pandas dataframe gdp_india = pd.DataFrame(list(gdp_india.items()), columns=['financial_year', 'gdp_growth']) # connect to mongodb mongodb_obj = MongoDB('etluser', 'etluser', 'localhost', 'GDP') mongodb_obj.insert_into_db(gdp_india, 'India_GDP')
def __init__(self, tab_images): super(Collector, self).__init__() self._lock = threading.RLock() self._db = MongoDB() self._thread_stop = False self._images = [] self._null_times = 0 self._read_pos = -1 self._write_pos = -1 self._tab_images = tab_images self._max_size = int( tools.get_conf_value('../config.conf', "image_collector", "max_size")) self._interval = int( tools.get_conf_value('../config.conf', "image_collector", "sleep_time")) self._allowed_null_times = int( tools.get_conf_value('../config.conf', "image_collector", 'allowed_null_times')) self._image_count = int( tools.get_conf_value('../config.conf', "image_collector", "images_count")) #初始时将正在做的任务至为未做 self._db.update(self._tab_images, {'image_pron_status': Constance.DOING}, {'image_pron_status': Constance.TODO}) self._db.set_ensure_index(self._tab_images, 'image_pron_status') self._finished_callback = None
class ImagePornControl(threading.Thread): def __init__(self, collector, tab_images): super(ImagePornControl, self).__init__() self._collector = collector self._tab_images = tab_images self._deal_image_count = int( tools.get_conf_value('config.conf', "image_porn", "deal_image_count")) self._interval = int( tools.get_conf_value('config.conf', "image_porn", "sleep_time")) self._db = MongoDB() self._image_porn_recg = ImagePornRecg() def run(self): while True: # 判断是否结束 if self._collector.is_finished(): break images = self._collector.get_images(self._deal_image_count) for image in images: try: image_url = image['image_url'] sexy_image_status = [] # 检测结果 sexy_image_url = [] # 对应的照片url try: result = self._image_porn_recg.image_predict(image_url) except Exception as e: log.debug(e) else: log.debug(''' image_url :%s result :%d ''' % (image_url, result)) sexy_image_status = 1 if result == 6 else 5 sexy_image_url = image_url self._db.update(self._tab_images, {'_id': image['_id']}, { 'image_pron_status': Constance.DONE, 'sexy_image_status': sexy_image_status, 'sexy_image_url': sexy_image_url }) except Exception as e: raise # log.error('%s 表中无 image_url 字段'%self._tab_images) time.sleep(self._interval) self.finished() def finished(self): self._image_porn_recg.close()
def test_insertar_elemento_incorrecto(): """Test 6: intento fallido de insertar una nueva mascota puesto que alguno de los parámetros para establecer la conexión no son correctos.""" nueva_mascota = { 'id': '1', 'nombre': 'Simba', 'tipo_animal': 'cat', 'raza': 'angora', 'tamanio': 'small', 'genero': 'male', 'edad': 'young', 'tipo_pelaje': 'short', 'estado': 'adoptable', 'ninios': 'no', 'gatos': 'yes', 'perros': 'no', 'ciudad': 'Granada', 'pais': 'España' } """Para ello modificamos la conexión a la base de datos para que sea incorrecta.""" conexion_incorrecta = MongoDB(os.environ.get("MONGODB_URI"), 'PetfinderBD', 'mascotas') conexion_incorrecta.coleccion = None with pytest.raises(CollectionNotFound): assert conexion_incorrecta.insertar_elemento(nueva_mascota)
def extract_and_upload_text_from_images(bucket_name, tiff_documents_list, filing_type): print("Length of mini_tiff_documents_list:", len(tiff_documents_list)) aws_s3_sdk_controller = AwsS3SdkController() mini_thread_postgresql_client = PostgreSQLClient() mongodb_client = MongoDB() for json_object in tiff_documents_list: try: document_id = json_object.get("document_id") # print(json_object.get("tiff_document_name")) # print("Document ID:", document_id) tiff_document = aws_s3_sdk_controller.download_specific_s3_file(bucket_name, json_object.get("tiff_document_name")) extracted_string = string_extracton_v3.run_string_extraction(tiff_document, filing_type) mongodb_client.insert_document_into_database(document_id, extracted_string) mini_thread_postgresql_client.update_mysql_document_tracking(document_id) except Exception as error: print("[ERROR] Tiff File Name:", json_object.get("tiff_document_name")) print("[ERROR] Document ID:", document_id) print("[ERROR] extract_and_upload_text_from_images", error)
def time(self, update, context): # Handles convo cancellation if update.message.text == "/cancel": context.bot.send_message( update.effective_chat.id, "You have stopped scheduling for a reminder.") return ConversationHandler.END # check if there are 8 numbers in the str message_time = update.message.text if len(message_time) != 4 or message_time.isdigit() == False: context.bot.send_message( update.effective_chat.id, "Please check that you have entered 4 numbers.") return self.TIME hour = message_time[0:2] minute = message_time[2:4] # Check if date is current date or later including the time current_date = datetime.now() input_date = datetime(int(self.year_val), int(self.month_val), int(self.day_val), int(hour), int(minute)) if current_date > input_date: context.bot.send_message( update.effective_chat.id, "You have entered a time in the past. Please re-enter the time(24 hours)." ) return self.TIME # store hour and minute for display self.hour_val = hour self.minute_val = minute # store the time in memory before writing into the database self.time_val = message_time reply_message = "Description: {0}\nDate(Day/Month/Year): {1}/{2}/{3}\nTime(hh:mm): {4}:{5}".format( self.description_val, self.day_val, self.month_val, self.year_val, self.hour_val, self.minute_val) context.bot.send_message(update.effective_chat.id, reply_message) # Init mongodb connection db = MongoDB('heroku_mqncqpgt', 'reminders') db.insertonedb({ "chatid": update.message.chat.id, "description": self.description_val, "date": self.date_val, "time": self.time_val }) return ConversationHandler.END
def __init__(self): conf = Configuration() self.ptext = TextProcess(conf) self.ds = DataSet(conf) self.mongo = MongoDB(self.ds.db, self.ds.collection) self.tweet = "" self.tokens = "" self.i = 0 self.enable_translation = self.ptext.translation self.translation_store = self.ptext.translation_store
def deal_item(self, data): rumors = data["results"] mongo = MongoDB(MONGODB_URI, "rumors") for rumor in rumors: rumor_id = generate_hash("{}{}".format(rumor["title"], rumor["rumorType"])) rumor.update({"_id": rumor_id, "source": "丁香园", "agency": "丁香园"}) if self.url_repeat(rumor_id) is False and mongo.insert(rumor): self.update_filter_queue(rumor_id)
def fetch(page): db = MongoDB() uuid = get_uuid() token = CreatToken(page).get_token() params = { 'cityName': cityName, 'cateId': type_, 'areaId': '0', 'sort': '', 'dinnerCountAttrId': '', 'page': page, 'userId': '', 'uuid': uuid, 'platform': '1', 'partner': '126', 'originUrl': originUrl + 'pn{}/'.format(page), 'riskLevel': '1', 'optimusCode': '1', '_token': token } res = requests.get(base_url, params=params, headers=HEADERS) result = json.loads(res.text) items = result['data']['poiInfos'] for item in items: # print(store) store = parse_store(item) # db.save(store) poiId = store['poiId'] commentCount = store['allCommentNum'] max_page = math.ceil(int(commentCount) / 10) comment_list = [] for offset in range(max_page): params = { 'uuid': get_uuid(), 'id': poiId, 'userId': '2490983615', 'offset': offset * 10, 'pageSize': '10', } resp = requests.get(comment_url, params=params, headers=HEADERS) # print(resp.text) result = json.loads(resp.text) items = result['data']['comments'] for item in items: comment = parse_comment(item) print(comment) comment_list.append(comment) store['comment'] = comment_list print(store) db.save(store)
def __init__(self, *args, **kwargs): self.location_name = location_name = kwargs.get("location_name") self.name = location_name + "_detailed" self.location = location = LocationManager().get_location(location_name) self.document_ids_ready_for_processing = [] self.detailed_collection = MongoDB(location.detailedCollectionName) self.recent_collection = MongoDB(location.recentCollectionName) self.start_urls = [self.next_url()] self.proxy = ProxyProvider.provide() super().__init__(name=self.name) self.logger.info(f"DetailedItemsSpider initialized")
def __init__(self): self.PORT = 9999 self.BUFSIZE = 256 self.server = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.server.bind(('', self.PORT)) print('[Server] Pengsoo Server Ready!') self.device = Device() self.db = MongoDB() self.mpu = mpu6050() self.targetAddr = ''
def index(): message = None db = MongoDB(app) if request.method == 'POST': sake_data = check_request_set(request) if not sake_data: message = 'Error' else: db.set_sake(sake_data) message = 'Success!' return render_template('index.html', message=message)
def regist(): message = None db = MongoDB(app) sake_list = [] if request.method == 'POST': sake_name = check_request_get(request) if not sake_name: message = 'Error' else: sake_list = db.get_sake(sake_name) message = '%s people found' % len(sake_list) return render_template('search.html', message=message, sake_list=sake_list)
def __init__(self, collector, tab_images): super(ImagePornControl, self).__init__() self._collector = collector self._tab_images = tab_images self._deal_image_count = int( tools.get_conf_value('config.conf', "image_porn", "deal_image_count")) self._interval = int( tools.get_conf_value('config.conf', "image_porn", "sleep_time")) self._db = MongoDB() self._image_porn_recg = ImagePornRecg()
def action(): is_empty, logs = mysql_operator.get_last_quotas() if is_empty: write_log("Log is Empty.") return mongo_operator = MongoDB(user, password, host, port, database) for row in logs: mongo_operator.save(row['resource'], row['in_use'], row['created'], row['project_id']) for row in logs: is_saved = mongo_operator.check(row['resource'], row['project_id'], row['created'], row['in_use']) if is_saved == False: mongo_operator.save(row['resource'], row['in_use'], row['created'], row['project_id']) is_saved = mongo_operator.check(row['resource'], row['project_id'], row['created'], row['in_use']) if is_saved == False: write_log("resource:" + str(row['resource']) + " project_id:" + str(row['project_id']) + " created:" + str(row['created']) + " in_use:" + str(row['in_use']) + " write failed.") mysql_operator.clear_old_quotas()
def __init__(self, category): """ :param category: The category you are searching for, e.g. flats /heels :return: None Define the category, base url and a variable to store the links to all pages. """ self.category = category.lower() self.company = 'barneys' self.base_url = 'http://www.barneys.com/barneys-new-york/women/shoes/' self.all_links = [] self.params = {'start': 0} self.mongo = MongoDB(db_name='shoes', table_name=self.category)
def __init__(self, category): """ :param category: The category you are searching for, e.g. flats /heels :return: None Define the category, base url and a variable tostore the links to all pages. """ self.category = category.lower() self.company = 'saks' self.base_url = 'http://www.saksfifthavenue.com/Shoes/' self.all_links = [] self.params = {'Nao': 0} self.mongo = MongoDB(db_name='shoes', table_name=self.category)
def __init__(self, category): """ :param category: The category you are searching for, e.g. flats /heels :return: None Define the category, base url and a variable to store the links to all pages. """ self.category = category.lower() self.company = 'nordstrom' self.base_url = 'http://shop.nordstrom.com/c/' self.all_links = [] self.params = {'page': 1} self.mongo = MongoDB(db_name='shoes', table_name=self.category)
def _compute_idf(self): """ Compute idf :return: """ temp_dict = {} self._mongo_session = MongoDB() self._mongo_session.connect(host="localhost", port=27017, database="crawler", collection="tf_dict") db_tf_results = self._mongo_session.select({}) for result in db_tf_results: for _file, words_dict in result.items(): if _file not in temp_dict: temp_dict[_file] = {} doc_norm = 0 for word, tf in words_dict.items(): if word in self._reverse_index: idf = math.log( self._number_of_docs + 0.1 / float(len(self._reverse_index[word])), 10) idf = float("{0:.6f}".format(idf)) doc_norm += math.pow(tf * idf, 2) temp_dict[_file][word] = { "tf": tf, "idf": idf, "doc": float("{0:.6f}".format(tf * idf)) } temp_dict[_file]['|doc|'] = float("{0:.6f}".format( math.sqrt(doc_norm))) self._mongo_session.connect(host="localhost", port=27017, database="crawler", collection="tf_idf_dict") self._mongo_session.insert_document(temp_dict, "tf_idf_dict") self._mongo_session.disconnect()
def deleteJob(job_id): mDB = MongoDB() Col = mDB.DB["Jobs"] Col.delete_one({"_id": ObjectId(job_id)}) x = Scheduler() x.deleteJob(job_id)
def create_app(): app = Flask(__name__) db = MongoDB() @app.route('/') def index(): items = db.get_items() item_view_model = ViewModel(items) return render_template('index.html', view_model=item_view_model) @app.route('/', methods=['POST']) def add_item(): title = request.form['item_title'] db.add_item(title) return redirect(url_for('index')) @app.route('/items/<id>', methods=['POST']) def mark_item_as_complete(id): db.mark_as_complete(id) return redirect(url_for('index')) @app.route('/items/delete/<id>', methods=['POST']) def delete_item(id): db.remove_item(id) return redirect(url_for('index')) return app
def write_db(collection_name, db_name, entity_generator): """Used to write the entity name and the frequency of the entities to a MongoDB database""" mongodb = MongoDB(db_name=db_name) col = mongodb.db.get_collection(collection_name) for entity_list in entity_generator: entities = Counter(list(entity_list)) item = list({ 'entity': i[0], 'f': i[1] } for i in entities.most_common()) for value in item: if value['entity']: new_entity = search_acronyms(value['entity']) new_id = sub(r'[\s-]+', '', str(new_entity).lower()) # Appending entities to the MongoDB result = col.find_one({'_id': new_id}) print '.', if result: col.update_one({'_id': new_id}, { '$set': { 'entity': __compare(new_entity, result['entity']), 'f': value['f'] + result['f'], } }, upsert=False) else: col.insert_one({ '_id': new_id, 'entity': new_entity, 'f': value['f'], }) print print 'Process completed successfully!!!'
async def generating(secret: str, code_phrase: str) -> dict: """ Processes request and makes a query to the database for a record. Example: /generate?secret=super_secret_message&code_phrase=code_password :param secret: secret message :type secret: str :param code_phrase: for access control :type code_phrase: str :return: response with secret_id :rtype: dict """ db = MongoDB() return {"secret_id": db.create_secret(secret, code_phrase)}
async def geting(secret_id: str, code_phrase: str) -> dict: """ Processes the request and makes a query to the database for reading. Example: /secrets/5eb82d06b893f7227b4f73ff?code_phrase=code_password :param secret_id: :type secret_id: str :param code_phrase: :type code_phrase: str :return: decrypted secret or an error :rtype: dict """ db = MongoDB() return {"secret": db.get_secret(secret_id, code_phrase)}
def __init__(self, *args, **kwargs): self.name = kwargs.get("location_name") + "_recent" delta_timestamp = datetime.now() - timedelta(minutes=3) self.last_stamp = int(datetime.timestamp(delta_timestamp)) self.page = 1 self.location = location = LocationManager().get_location(kwargs.get("location_name")) self.recent_collection = MongoDB(location.recentCollectionName) self.detailed_collection = MongoDB(location.detailedCollectionName) self.url_pattern = 'https://m.avito.ru/api/9/items?key={key}&sort={sort}&locationId={location_id}&page=__page__&lastStamp=__timestamp__&display={display}&limit={limit}'.format( key=API_KEY, sort='date', location_id=location.id, display='list', limit=99) self.start_urls = [self.next_url()] self.proxy = ProxyProvider.provide() super().__init__(name=self.name)
def setUp(self): test_config = TestConfig() self.data_path = test_config.get_data_path() config = Configuration(os.path.join(self.data_path,'build.ini')) db_name = config.get('mongodb1','db_name') host = config.get('mongodb1','host') config = MongoDBConfig(db_name, host) self.db = MongoDB(config)
def __WriteDict(dic, collection, FieldK, FieldV): db = MongoDB.getConnection('mining') docs = [] for k,v in dic.iteritems(): doc = {} doc[FieldK] = k doc[FieldV] = v docs.append(doc) db[collection].insert(docs)
def __init__(self): conf = Configuration() self.ptext = TextProcess(conf) self.ds = DataSet(conf) self.mongo = MongoDB(self.ds.db,self.ds.collection) self.tweet="" self.tokens = "" self.i = 0 self.enable_translation = self.ptext.translation self.translation_store = self.ptext.translation_store
def listJob(): mDB = MongoDB() Col = mDB.DB["Jobs"] jobs = Col.find() resp = json.loads(dumps(jobs)) return resp, 200
def test_app(): load_dotenv(override=True) # Create the new board & update the board id environment variable os.environ['COLLECTION_NAME'] = 'test-todos' # construct the new application application = app.create_app() # start the app in its own thread. thread = Thread(target=lambda: application.run(use_reloader=False)) thread.daemon = True thread.start() yield application # Tear Down thread.join(1) mongodb = MongoDB() mongodb.get_collection().drop()
def __ReadDict(dic, collection, FieldK, FieldV): db = MongoDB.getConnection('mining') for doc in db[collection].find(): k = doc[FieldK] v = doc[FieldV] if (FieldK == "term"): k = k.encode('utf-8') if (FieldV == "term"): v = v.encode('utf-8') dic[k] = v
def __init__(self, search_term): """ :param search_term: The term you search for, e.g. flats / pumps :return: None Define the search term, base url and a variable to store the links to all the pages related to the serach term """ self.search_term = search_term self.company = 'saks' self.params = {'SearchString': self.search_term, 'Nao': 0} self.base_url = 'http://www.saksfifthavenue.com/search/EndecaSearch.jsp?\ bmForm=endeca_search_form_one&bmFormID=kKYnHcK&bmUID=kKYnHcL&bmIsForm=true\ &bmPrevTemplate=%2Fmain%2FSectionPage.jsp&bmText=SearchString&submit-search=\ &bmSingle=N_Dim&N_Dim=0&bmHidden=Ntk&Ntk=Entire+Site&bmHidden=Ntx\ &Ntx=mode%2Bmatchpartialmax&bmHidden=prp8&prp8=t15&bmHidden=prp13&prp13=\ &bmHidden=sid&sid=14BBCA598131&bmHidden=FOLDER%3C%3Efolder_id&FOLDER%3C%3Efolder_id=' self.base_url = self.base_url.replace(' ', '') self.all_links = [] self.mongo = MongoDB(db_name='shoe', table_name=search_term)
def __init__(self, config, nodeName, loadFromDB = False): self.node = config.GetChild(nodeName) self.trained = loadFromDB GlobalInfo.Init(config, "__global__", loadFromDB) #get data source data_source = self.node.GetChild("data_source") dbname = data_source.GetChild('db').GetValue() self.collection = data_source.GetChild('collection').GetValue() self.field = data_source.GetChild('field').GetValue() #self.field = self.field.encode('utf-8') self.db = MongoDB.getConnection(dbname)
def insertBookIntoDB(bookPath, bookInfo, bookTitle, toc, indexdict, logger): bookdict = {} #item['author'] = item['author'].encode('utf-8', 'strict') bookdict['title'] = bookTitle.encode('utf-8', 'ignore') bookdict['path'] = bookPath['pdf'] bookdict['name']= bookPath['name'] bookdict['toc'] = toc bookdict['indexkeywords'] = indexdict #bookdict['bookinfo'] = bookInfo dbInstance = MongoDB() result, id = dbInstance.insertABook(GLOBAL_CONSTANTS.CollectionName, bookdict) print "Insert book status : ", result, GLOBAL_CONSTANTS.CollectionName logger.writeLine("Book inserted into the database : " + str(result) + " " + bookdict['name'] + " " + GLOBAL_CONSTANTS.CollectionName) return
def check_schedules(): print (schedulecheck.updater) db = MongoDB('heroku_mqncqpgt', 'reminders') # Get all reminders today. current_date = datetime.now().date().strftime('%d%m%Y') query = { "date" : current_date } # Get current time current_time = datetime.now().strftime("%H%M") # Loop through all reminders today to check for the time for element in db.finddb(query): # separate the date str # day = element["date"][0:2] # month = element["date"][2:4] # year = element["date"][4:8] # separate the time str hour = int(element["time"][0:2]) minute = int(element["time"][2:4]) # Convert hour and minute into datetime remindertime = datetime.now().replace(hour=hour, minute=minute).strftime("%H%M") # If remindertime is less than current timing, send out timing and delete the reminder from DB if remindertime <= current_time: chatid = element["chatid"] messagestr = "Reminder: {0}".format(element["description"]) schedulecheck.updater.bot.send_message(chatid, messagestr) # Convert object id str into ObjectID objectid = ObjectId(str(element["_id"])) # Delete from db query = { "_id" : objectid } db.deleteonedb(query)
class TweetDB(): def __init__(self): conf = Configuration() self.ptext = TextProcess(conf) self.ds = DataSet(conf) self.mongo = MongoDB(self.ds.db,self.ds.collection) self.tweet="" self.tokens = "" self.i = 0 self.enable_translation = self.ptext.translation self.translation_store = self.ptext.translation_store def get_tweet_from_db(self): where = { "text":{"$exists":"true"}, "geo.coordinates":{"$exists":"true"} } select = {"text":1,"source":1,"geo":1, "user":1,"retweet_count":1,"created_at":1} results = self.mongo.find(where,select) return results def process_tweets(self): tweets = self.get_tweet_from_db() for rawTweet in tweets: if "text" in rawTweet: tokens = {} self.ptext.set_tweet_text(rawTweet['text']) self.ptext.set_tweet_source(rawTweet['source']) self.ptext.process_text() rawTweet['source'] = self.ptext.get_tweet_source() rawTweet['text'] = self.ptext.get_tweet_text() self.tokens = self.ptext.get_tweet_tokens() tokens['tokens'] = self.tokens rawTweet.update(tokens) self.tweet = self.cleaner.unset_tweet_keys(rawTweet) if not self.ptext.get_translate_status(): self.ds.output_tweet(self.tweet) self.i += 1 else: if self.translation_store: if self.enable_translation: if not self.ptext.get_translate_failed(): self.ds.output_tweet(self.tweet) self.i += 1 else: self.ds.output_tweet(self.tweet) self.i += 1 def get_tweet_count(self): return self.i
def __init__(self, config, nodeName, loadFromFile = False): self.curNode = config.GetChild(nodeName) self.rate = float(self.curNode.GetChild("rate").GetValue()) self.method = self.curNode.GetChild("method").GetValue() self.modelPath = self.curNode.GetChild("model_path").GetValue() self.people_tag_collection = self.curNode.GetChild('people_tag').GetValue() self.blackList = {} dbname = self.curNode.GetChild("db").GetValue() self.db = MongoDB.getConnection(dbname) self.trained = loadFromFile if (loadFromFile): f = open(self.modelPath, "r") for line in f: self.blackList[int(line)] = 1
class MongoDBUT(unittest.TestCase): def setUp(self): test_config = TestConfig() self.data_path = test_config.get_data_path() config = Configuration(os.path.join(self.data_path,'build.ini')) db_name = config.get('mongodb1','db_name') host = config.get('mongodb1','host') config = MongoDBConfig(db_name, host) self.db = MongoDB(config) def testInsert(self): hobby = ['AA','BB','CC'] p1 = People('dustin',34,hobby) object_id = self.db.insert(p1.__dict__, 'people') # print object_id people = self.db.findOne({"_id":object_id},'people') # print people self.assertEquals(34, people['_age'], 'age should be 34') self.assertEquals(['AA','BB','CC'], people['_hobby'], '_hobby should be AA,BB,CC') def tearDown(self): "Delete seed data from testing database" self.db.removeAll('people')
def quotas_usage(request, project_id, resource): token = request.META.get('HTTP_X_AUTH_TOKEN') mongodb_info = setting.mongodb_info host = mongodb_info['host'] user = mongodb_info['user'] password = mongodb_info['password'] port = mongodb_info['port'] database = mongodb_info['database'] m = MongoDB(user, password, host, port, database) v = Verify() v.set_request(KEY_STONE_HOST['host'], KEY_STONE_HOST['port']) v.set_tenantname(project_id) if v.is_token_available(token): start_time = int(request.GET.get('start_time')) end_time = int(request.GET.get('end_time')) response = m.load(resource, project_id, start_time, end_time) response_json = json.dumps(response) return HttpResponse(response_json, content_type="application/json") else: return HttpResponse(v.get_request_data())
def action(): is_empty, logs = mysql_operator.get_last_quotas() if is_empty: write_log("Log is Empty.") return mongo_operator = MongoDB(user, password, host, port, database) for row in logs: mongo_operator.save( row['resource'], row['in_use'], row['created'], row['project_id']) for row in logs: is_saved = mongo_operator.check( row['resource'], row['project_id'], row['created'], row['in_use']) if is_saved == False : mongo_operator.save( row['resource'], row['in_use'], row['created'], row['project_id']) is_saved = mongo_operator.check( row['resource'], row['project_id'], row['created'], row['in_use']) if is_saved == False : write_log( "resource:" + str(row['resource']) + " project_id:" + str(row['project_id']) + " created:" + str(row['created']) + " in_use:" + str(row['in_use']) + " write failed." ) mysql_operator.clear_old_quotas()
'count': 'number of synapses to generate', } parser = argparse.ArgumentParser(help['mon_gen']) parser.add_argument('count', type=int, help=help['count']) parser.add_argument('-p', type=int, default=27017, help=help['port']) # Return parsed dictionary return vars(parser.parse_args()) if __name__ == '__main__': # Get argument dictionary argd = parse_args() # Make the database db = MongoDB(argd['p']) # Clear database db.reset() print('cleared database') # Parse the entries t0 = time.time() all_entries = generate_synapses(argd['count']) t1 = time.time() print('generated synapses in {:.2f} sec'.format(t1-t0)) # add all entries db.add_points(all_entries) t2 = time.time() print('wrote db in {:.2f} sec'.format(t2-t1))
def __init__(self, conf): self.db_engine = conf.database.engine self.db = conf.database.db self.collection = conf.database.collection self.counter = 0 self.location = "" self.geo = conf.geo.geo self.writefile = conf.output.write self.geowrite = conf.geo.write self.userwrite = conf.user.write self.tweetwrite = conf.tweet.write self.wordswrite = conf.words.write if self.geo: self.geoEngine = conf.geo.engine if self.geoEngine == "google": self.googleLimit = conf.geo.limit self.usetime = conf.output.filenamewithdate self.outdir = conf.output.directory if self.outdir: if not os.path.exists(self.outdir): os.makedirs(self.outdir) self.tweetfilename = conf.tweet.filename self.geofilename = conf.geo.filename self.userfilename = conf.user.filename self.wordsfilename = conf.words.filename if self.usetime: time = datetime.now() time = time.strftime("%Y-%m-%d") self.outdir += "%s/" % (time) if not os.path.exists(self.outdir): os.makedirs(self.outdir) self.tweetfields = conf.tweet.fields.split(",") self.userfields = conf.user.fields.split(",") self.format = conf.output.format self.out_tweets_file = "%s%s.%s" % (self.outdir,self.tweetfilename,self.format) self.out_geo_file = "%s%s.%s" % (self.outdir,self.geofilename,self.format) self.out_user_file = "%s%s.%s" % (self.outdir,self.userfilename,self.format) self.out_words_file = "%s%s.%s" % (self.outdir,self.wordsfilename,self.format) self.store = conf.database.store if self.store: try: if self.db_engine == "mongo": self.mongo = MongoDB(self.db,self.collection) else: print "Currently only MongoDB supported for storing tweets. \nContact [email protected] for additional support" print "==================================================\n" self.mongo = MongoDB(self.db,self.collection) except: print "Couldn't find database driver. Storing option disabled" print "==================================================\n" self.store = False pass
# RSSReader().read_rss('http://glavnoe.ua/rss/newsall.xml','glavnoe.ua') # RSSReader().read_rss('http://glavcom.ua/rss.xml','glavcom.ua') # RSSReader().read_rss('http://www.unn.com.ua/rss/news_uk.xml','unn.com.ua') # RSSReader().read_rss('http://joinfo.ua/rss/main.xml','joinfo.ua') # RSSReader().read_rss('http://focus.ua/modules/rss.php','focus.ua') # RSSReader().read_rss('http://comments.ua/export/rss_ru.xml','comments.ua') data = RSSReader().read_rss('http://focus.ua/modules/rss.php','focus.ua') # print '\n\ndata_received!!!\n\n' # for i in data: # # print "Original:".encode('utf8') + str(i[2].encode('utf8')) # print ExtraMethods().remove_tags(i[2].encode('utf8')) # print '\n\n' database = MongoDB() for item in data: db_query = database.makeArticleInfoQuery(item) if database.verifyQueryIsReady(db_query): database.writeArticleInfo(db_query) else: print 'Error item#' + str(data.index(item)) database.printDB() #need to resolve decode issue http://galinfo.com.ua/rss/export.rss
class DataSet(): def __init__(self, conf): self.db_engine = conf.database.engine self.db = conf.database.db self.collection = conf.database.collection self.counter = 0 self.location = "" self.geo = conf.geo.geo self.writefile = conf.output.write self.geowrite = conf.geo.write self.userwrite = conf.user.write self.tweetwrite = conf.tweet.write self.wordswrite = conf.words.write if self.geo: self.geoEngine = conf.geo.engine if self.geoEngine == "google": self.googleLimit = conf.geo.limit self.usetime = conf.output.filenamewithdate self.outdir = conf.output.directory if self.outdir: if not os.path.exists(self.outdir): os.makedirs(self.outdir) self.tweetfilename = conf.tweet.filename self.geofilename = conf.geo.filename self.userfilename = conf.user.filename self.wordsfilename = conf.words.filename if self.usetime: time = datetime.now() time = time.strftime("%Y-%m-%d") self.outdir += "%s/" % (time) if not os.path.exists(self.outdir): os.makedirs(self.outdir) self.tweetfields = conf.tweet.fields.split(",") self.userfields = conf.user.fields.split(",") self.format = conf.output.format self.out_tweets_file = "%s%s.%s" % (self.outdir,self.tweetfilename,self.format) self.out_geo_file = "%s%s.%s" % (self.outdir,self.geofilename,self.format) self.out_user_file = "%s%s.%s" % (self.outdir,self.userfilename,self.format) self.out_words_file = "%s%s.%s" % (self.outdir,self.wordsfilename,self.format) self.store = conf.database.store if self.store: try: if self.db_engine == "mongo": self.mongo = MongoDB(self.db,self.collection) else: print "Currently only MongoDB supported for storing tweets. \nContact [email protected] for additional support" print "==================================================\n" self.mongo = MongoDB(self.db,self.collection) except: print "Couldn't find database driver. Storing option disabled" print "==================================================\n" self.store = False pass def output_tweet(self,tweet): if self.writefile: self.output_tweets_in_file(tweet) if self.store: self.output_db(tweet) def output_tweets_in_file(self,result): keywords="" points = "" words = True uid = result['user']['id'] if self.geo: if self.geowrite: if not type(result["geo"]).__name__ == 'NoneType': lat=0.00 long=0.00 while len(result["geo"]["coordinates"]) != 0: if len(result["geo"]["coordinates"]) == 2: lat = str(result["geo"]["coordinates"].pop(0)) else: long = str(result["geo"]["coordinates"].pop(0)) points = "%s %s" % (lat,long) if self.userwrite: user = "" if self.userfields: i = 0 ttl = len(self.userfields) for field in self.userfields: if field == 'screenname' or field== 'name': user += "\"%s\"" % result["user"][field].encode("UTF-8") elif field== 'description' or field == 'time_zone': user += "\"%s\"" % utils.clean(result["user"][field].encode("ASCII","ignore")) elif field =='created_at': created = result["user"][field].encode("UTF-8") date = parse(created) created = date.strftime("%Y-%m-%d %H:%M:%S") user += "\"%s\"" % created elif not field == 'id': if result["user"][field] != "": user += "%s" % result["user"][field] else: user += "0" if i < ttl: user += "," i += 1 if self.tweetwrite: tweet = "" if self.tweetfields: i = 0 ttl = len(self.tweetfields) for field in self.tweetfields: if field == 'source': source = utils.parse_alink(result[field]) tweet += "\"%s\"" % source.encode("UTF-8") elif field == 'created_at': created = result[field].encode("UTF-8") date = parse(created) created = date.strftime("%Y-%m-%d %H:%M:%S") tweet += "\"%s\"" % created elif field == 'tokens': list(result[field]).sort() for token in result[field]: keywords += token.lower()+" " keywords = keywords.rstrip().encode("UTF-8") if keywords == "": words = False tweet += "\"%s\"" % keywords elif field == 'text': text = utils.clean(result[field]) tweet += "\"%s\"" % text.encode("UTF-8") elif field == 'retweet_count': tweet += "\"%s\"" % str(result[field]).encode("UTF-8").replace("+","") i += 1 if i < ttl: tweet += "," if points != "": if keywords == "": field = "tokens" list(result[field]).sort() for token in result[field]: keywords += token.lower()+" " keywords = keywords.rstrip().encode("UTF-8") geo_data = "%s,\"%s\",\"%s\"" % (uid,points,keywords) self.output_data_file(self.out_geo_file, geo_data) if user != "": user_data = "%s,%s" % (uid,user) self.output_data_file(self.out_user_file, user_data) if tweet != "": tweets_data = "%s,%s" % (uid,tweet) self.output_data_file(self.out_tweets_file, tweets_data) if self.wordswrite: if words: if keywords == "": field = "tokens" list(result[field]).sort() for token in result[field]: keywords += token.lower()+" " keywords = keywords.rstrip().encode("UTF-8") words_data = "\"%s\"" % keywords self.output_data_file(self.out_words_file,words_data) def output_data_file(self,filename,data): if self.format == 'arff': #self.output_arff(filename, data) print "arff output currently not supported" if self.format == 'txt': self.output_txt(filename, data) def output_arff(self,filename,data): if not os.path.exists(filename): header = ''' % Title: TweetStream Dataset % % Sources: % (a) Creator: M. Fazle Taher % (b) Email: [email protected] % @relation tweet-stream @attribute tweet string @attribute source string @data ''' with open(filename,"w") as fp: fp.write(header) fp.close() with open(filename,"ab") as fp: fp.writelines("%s\n" % data.strip()) fp.close() def output_txt(self,filename,data): with open(filename,"a") as fp: fp.writelines("%s\n" % (data)) fp.close() def output_db(self,tweet): if self.store: self.mongo.insert(tweet) def get_region_country_from_points(self,latitude,longitude): if self.counter < self.google_limit: gcoder = Geocoder() results = gcoder.reverse_geocode(latitude,longitude) region = results.administrative_area_level_1__short_name country = results.country counter=counter+1 self.location="%s %s" %(region,country)
def makeTotalSearch(): data_base = MongoDB() return data_base.findElements('article_info')
def makeSearchByWord(word): data_base = MongoDB() return data_base.findElemetByWord('article_info',word)
def __init__(self): MongoDB.__init__(self)
help = { 'mon_key': 'read single value from mongodb', 'key': 'integer synapse id to read', 'port': 'port for the database', } parser = argparse.ArgumentParser(help['mon_key']) parser.add_argument('key', type=int, help=help['key']) parser.add_argument('-p', type=int, default=27017, help=help['port']) # Return parsed dictionary return vars(parser.parse_args()) if __name__ == '__main__': # Get argument dictionary argd = parse_args() # Make the database key = argd['key'] db = MongoDB(argd['p']) # Start timing mongo lookup t0 = time.time() # Read the value from mongo db in_bounds = db.check_key(key) t1 = time.time() # Print time taken to check bounds print("""{} in {:.2f} seconds """.format(in_bounds, t1-t0))
doc = doc.decode("gbk").encode("utf-8") except: page_id += process_num continue page_id += process_num soup = BeautifulSoup(doc) word = soup.find('h1', "title") if word: #baike.append({'title':word.string, 'url':url, 'html':doc}) #if not db.word_dic.find_one({'word':word.string}): words.append({'word':word.string, 'len':len(word.string)}) matchs = soup.findAll(href=re.compile('^/view/\d+.htm')) for match in matchs: #if match.string: if match.string and not db.word_dic.find_one({'word':match.string}): words.append({'word':match.string, 'len':len(match.string)}) if len(words) >= 10: db.word_dic.insert(words) words = [] #db.baike.insert(baike) #baike = [] if __name__=="__main__": db = MongoDB.getConnection('mining') process_num = 1 startindex = 1 for i in range(startindex, process_num+startindex): p = Process(target=son,args=(process_num, i, db)) p.start()
#!/usr/bin/env python #-*- coding:utf-8 -*- from mongodb import MongoDB MongoDB.connectDB('local') MongoDB.connectDB('mining') MongoDB.connectDB('recommend')
# Finally, let's store all documents into Mongodb from question import Question from answer import Answer from mongodb import MongoDB from saveImage import save_img, save_text from urllib import request, error q = Question("https://www.zhihu.com/question/39547745") q.setting() answer_list = q.get_answer_id_list() #q_info = 'Title : '+q.title+'\nQuestion url : '+q.qurl+'\nQuestion id : '+\ # str(q.qid)+'\nAsker : '+q.asker+'\nNumber of Follows : '+str(q.num_of_follows)+\ # '\nNumber of Comments : '+ str(q.num_of_comments)+'\nNumber of Answers : '+str(q.num_of_answers) #save_text('./%s/' % str(q.qid), str(q.qid)+'_info', q_info) mongo = MongoDB(str(q.qid), False) # in inintialization, it will connect Mongodb server ''' mongo.insertData('Questions', { 'title': q.title, 'question_url': q.qurl, 'question_id': q.qid, 'asker': q.asker, 'num_of_follows': q.num_of_follows, 'num_of_comments': q.num_of_comments, 'num_of_answers': q.num_of_answers, 'answers_list': answer_list, }) # Insert all data for answer_id in answer_list: a = Answer(answer_id) a.setting()