class DatabaseCache: def __init__(self, database_name="Matsuo", collection_name="ImageCache", expiration_time=datetime.timedelta(minutes=10)): self.client = MongoClient()[database_name][collection_name] self.expiration_delta = expiration_time if self.expiration_delta is not None: self.client.create_index( [(DatabaseCacheItem.expiration_index, pymongo.ASCENDING)], expireAfterSeconds=0) self.client.create_index([(DatabaseCacheItem.expiration_index, pymongo.TEXT)]) def add_item(self, key, data): item = DatabaseCacheItem( key, bson.binary.Binary(data.read()), datetime.datetime.now() + self.expiration_delta) self.client.update_one(item.get_id(), update=item.get_update_form(), upsert=True) def get_item(self, key): element = self.client.find_one({DatabaseCacheItem.filename_key: key}) return element[DatabaseCacheItem.data_key] if element else None def remove_item(self, key): self.client.delete_many({DatabaseCacheItem.filename_key: key})
def init_id(): id_doc = {"type": "object_id_file", "object_id": 0} mongodb_url = current_app.config['MONGO_URL'] client = MongoClient(mongodb_url).stats.swift if MongoClient(mongodb_url).stats.swift.find_one( {"type": "object_id_file"}) is None: client.insert_one(id_doc) client.create_index("type", unique=True)
def init_id(mongodb_url): # USE IT ONLY ONE TIME !! id_doc = {"type": "object_id_file", "object_id": 0} client = MongoClient(mongodb_url).stats.swift if MongoClient(mongodb_url).stats.swift.find_one( {"type": "object_id_file"}) is None: client.insert_one(id_doc) client.create_index("type", unique=True)
class TvrainData: def __init__(self): """ Just load data from Mongo. """ self.sequences = MongoClient( os.environ['MONGODB_URL']).tvrain.sequences self.collection = MongoClient( os.environ['MONGODB_URL']).tvrain.articles self.collection.create_index("time") def get_random_articles(self, n): """Returns N of topics for index.html""" articles = self.collection.find().sort("time", 1).skip( random.randint(0, self.collection.count())).limit(n) return list(articles) def get_article_id(self, url): """Get id by url""" return self.collection.find_one({'url': url})['_id'] def get_articles_data(self, articles_urls): """ Get data from MongoDB for articles urls :param articles_urls: ['article_url', ...] :return: list of MongoDB documents """ articles = [] for url in articles_urls: articles.append(self.collection.find_one({'url': url})) return articles def iterate_articles(self, except_articles, skip=0, limit=None, query=None): """ Iteate throw all articles without ids of except articles :param except_articles: list of ids :return: """ if query is None: query = {} if limit is None: data = self.collection.find(query).skip(skip) else: data = self.collection.find(query).skip(skip).limit(limit) for value in data: if value['_id'] not in except_articles: yield value def get_sequences(self): """Return all sequences for train""" return list(self.sequences.find().limit(-1))
def add(file, client, db, cell, primkey): '''Load a data cell. An alternative primary key can be specified to insert documents. This is useful in the case where the data cell comes from a collaborator who uses a different set of UUIDs as we do. In this case, these identifiers do not reflect, whether an entry is a duplicate. Example: \b $ zoo add --client localhost:27017 --db zika --cell t5 zoo/data/cell_a.json Loading data cell. 3 documents inserted in collection t5. 0 duplicates skipped. Done. \b $ zoo add --db zika --cell t5 --primkey genbank.a zoo/data/cell_b.json Loading data cell. Index created on field "genbank.a". 1 documents inserted in collection t5. 3 duplicates skipped. Done. ''' click.echo('Loading data cell.') c = MongoClient(client)[db][cell] inserted = 0 duplicates = 0 if primkey == '_id': for line in file: try: c.insert_one(json.loads(line.strip())) inserted += 1 except DuplicateKeyError: duplicates += 1 pass else: # index primkey if it does not exists yet if primkey not in c.index_information(): c.create_index(primkey, unique=True, name=primkey) print('Index created on field', '"' + primkey + '".') for line in file: d = json.loads(line.strip()) if c.find_one({primkey: deep_get(d, primkey)}): # no duplicate duplicates += 1 else: c.insert_one(d) inserted += 1 print( inserted, 'documents inserted in cell', '"' + cell + '".') if duplicates > 0: print(duplicates, 'duplicates skipped.\nDone.')
class TvrainData: def __init__(self): """ Just load data from Mongo. """ self.sequences = MongoClient(os.environ['MONGODB_URL']).tvrain.sequences self.collection = MongoClient(os.environ['MONGODB_URL']).tvrain.articles self.collection.create_index("time") def get_random_articles(self, n): """Returns N of topics for index.html""" articles = self.collection.find().sort("time", 1).skip(random.randint(0, self.collection.count())).limit(n) return list(articles) def get_article_id(self, url): """Get id by url""" return self.collection.find_one({'url': url})['_id'] def get_articles_data(self, articles_urls): """ Get data from MongoDB for articles urls :param articles_urls: ['article_url', ...] :return: list of MongoDB documents """ articles = [] for url in articles_urls: articles.append(self.collection.find_one({'url': url})) return articles def iterate_articles(self, except_articles, skip=0, limit=None, query=None): """ Iteate throw all articles without ids of except articles :param except_articles: list of ids :return: """ if query is None: query = {} if limit is None: data = self.collection.find(query).skip(skip) else: data = self.collection.find(query).skip(skip).limit(limit) for value in data: if value['_id'] not in except_articles: yield value def get_sequences(self): """Return all sequences for train""" return list(self.sequences.find().limit(-1))
lines = list() threads = [] i = 0 with open(options.log) as fileobject: for index, line in enumerate(fileobject, 1): lines.append(line) if index % int(float( options.linesPerThread)) == 0 or index == num_lines: while str(activeWorkers) == str(options.threads): pass activeWorkers += 1 t = threading.Thread(target=formatLine, args=( lines, i, )) i += int(options.linesPerThread) threads.append(t) t.start() lines = list() progressBarObj.update(index) MongoDB.create_index("index") for thread in threads: thread.join() progressBarObj.finish()
# -*- coding: utf-8 -*- import time import chardet import pymongo from config import MONGO_HOST from motor.motor_asyncio import AsyncIOMotorClient from pymongo import MongoClient from pymongo.errors import DuplicateKeyError from maga import proper_infohash torrents_async = AsyncIOMotorClient(MONGO_HOST).spz.torrents torrents = MongoClient(MONGO_HOST).spz.torrents torrents.create_index([('infohash', pymongo.ASCENDING)], unique=True) torrents.create_index([('title', "text")]) def guess_and_decode(bytes): result = chardet.detect(bytes) try: if result.get("confidence", 0) > 0.8: return bytes.decode(result["encoding"]) return bytes.decode("GB18030") except: return def bytes_to_str(b): try: if isinstance(b, bytes): return b.decode()
# -*- coding: utf-8 -*- """ Create MongoDB indexes for this example. Author: Fabio Pani <fabiux AT fabiopani DOT com> License: see LICENSE """ from pymongo import MongoClient, DESCENDING, TEXT if __name__ == '__main__': articles = MongoClient().reddit.articles articles.create_index('subreddit_id') articles.create_index('id_reddit', unique=True) articles.create_index([('created_utc', DESCENDING)]) articles.create_index([('title', TEXT)])
csv_file = csv.DictReader(open('sidewalk-cafes.csv')) n_lines = 0 batch = [] for line in csv_file: location_field = line.pop('Location 1') match = location_pat.match(location_field) assert match, repr(location_field) group_dict = match.groupdict() lon, lat = float(group_dict['lon']), float(group_dict['lat']) line['location'] = SON([ ('type', 'Point'), ('coordinates', [lon, lat])]) batch.append(line) n_lines += 1 if not n_lines % 100: collection.insert(batch) batch = [] sys.stdout.write('.') sys.stdout.flush() # Final documents. if batch: collection.insert(batch) print('') print('Inserted %s documents.' % n_lines) print('Creating 2dsphere index.') collection.create_index([('location', '2dsphere')]) print('Done.')
from league import keys_league from services import client from amazon.dynamo import EntityHistory, Connection from league.services.shared import SharedPath from autobahn.twisted.websocket import WebSocketClientProtocol from twisted.web import server, resource from twisted.internet import threads ranking_created = 'ranking_created' ranking_list = 'ranking_list' ranking_collection = MongoClient().test.ranking ranking_collection.create_index(ranking_created, expireAfterSeconds=3600 * 24) ranking_query = { 'query_filter': { 'rank__add__null': False, 'rank__change__null': False, 'rank__remove__null': False }, 'conditional_operator': 'OR', 'attributes': ('rank__add', 'rank__change', 'rank__remove', 'ts_add'), 'reverse': True } def create_rankings(league_name, league_profile): return [
class mongodb(): """ { "push":'collection.update_one(query,{"$push":{f"{locator}":value }})', # Array operation only "pull":'collection.update_one(query,{"$pull":{f"{locator}":value }})', # Array operation only "unset":'collection.update_one(query,{"$unset":{f"{locator}":value }})', # Object/Dictonary operation only "pop":'collection.update_one(query,{"$pop":{f"{locator}":value }})', "set":'collection.update_one(query,{"$set":{f"{locator}":value }})', "set_all":'collection.update_one(query,{"$set":value})', "push_all":'collection.update_one(query,{"$push":value})', "add":'collection.insert_one(value)', "get":'collection.find_one(query)', "filter":'collection.find(query)', "search":'collection.find({"$text": {"$search": query}}).limit(limit_)', "delete":'collection.delete_one(query)' } """ def __init__(self, collection="raw", use=False): self.collection_name = collection self.use = use if self.use: self.credential = MongoCredential.objects.get(status=True, use=self.use) elif self.collection_name: self.credential = MongoCredential.objects.get( status=True, collection=self.collection_name) else: print("Provide atleast collection name or use for mongo operation") self.credential = MongoCredential.objects.find(status=True)[0] self.collection = MongoClient(self.credential.uri)[self.credential.db][ self.credential.collection] def push(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$push": kwargs}) def pull(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$pull": kwargs}) def unset(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$unset": kwargs}) def pop(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$pop": kwargs}) def set(self, query, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs} self.collection.update_one(query, {"$set": kwargs}) def push_all(self, query, value={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} value.update(kwargs) self.collection.update_one(query, {"$push": value}) def set_all(self, query, value={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} value.update(kwargs) self.collection.update_one(query, {"$set": value}) def add(self, value={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} if kwargs: value.update(kwargs) self.collection.insert_one(value) def delete(self, query={}, **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} query.update(kwargs) self.collection.delete_one(query) def get(self, query={}, sort=[], **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} query.update(kwargs) return self.collection.find_one(query, sort=sort) def filter(self, query={}, sort=[], **kwargs): kwargs = {i.replace("__", "."): j for i, j in kwargs.items()} query.update(kwargs) return self.collection.find(query, sort=sort) def search(self, query, limit=10): return self.collection.find({"$text": {"$search": query}}).limit(limit) def search_index(self, search_index=[]): """ e.g. : search_index = [("name","text"),("description","text")] """ self.collection.create_index(search_index)
class cache_session(Session): def __init__(self, db_uri, dbname='tmp', colname='cache', expire_time=None, disabled=False, url_only=True): self.col = MongoClient(db_uri)[dbname][colname] self.disabled = disabled self.url_only = url_only if expire_time: if not self.col.index_information().get('cache_time'+'_-1'): self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time) else: self.col.drop_indexes() self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time) super(cache_session, self).__init__() def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None): req = ( method.upper(), url, headers, files, data or {}, json, params or {}, auth, cookies, hooks, ) if self.url_only: req1 = { 'url': url, } else: req1 = { 'method': method.upper(), 'url': url, 'headers': headers, 'files': files, 'data': data or {}, 'json': json, 'params': params or {}, 'auth': auth, 'cookies': cookies, 'hooks': hooks, } req_to_str = '&'.join("%s=%s" % (k, v) for k, v in req1.items()) key = sha1(req_to_str).hexdigest() cached_one = self.col.find_one({'key': key}) if cached_one and not self.disabled: print 'cached' return cached_one['html'] else: online_req = super(cache_session, self).request(method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None ) html = online_req.text self.col.insert_one({'key': key, 'html': html, 'cache_time': datetime.utcnow()}) return html
# # from tweepy import OAuthHandler, StreamListener, Stream # # TWITTER_CONSUMER_KEY="RF4wlvvvyKucIi9H1pke46Lhk" # TWITTER_CONSUMER_SECRET="Jut4hf7m6tIn1Z4ftYPGSsACENpSFVziANEk9eQNgaE0KP3L5M" # TWITTER_ACCESS_TOKEN="848949254290788352-i8KrkbyV31L6GzvqwxcpC4TBU0FkuPd" # TWITTER_ACCESS_TOKEN_SECRET="lo2EV31yxeLJAQ2PIw9iN1CiwYDaOYGwTZwWX4jh37eRy" # # auth = OAuthHandler(consumer_key=TWITTER_CONSUMER_KEY, consumer_secret=TWITTER_CONSUMER_SECRET) # auth.set_access_token(TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET) # # # class Listener(StreamListener): # # def on_data(self, raw_data): # print(raw_data) # # # s = Stream(auth=auth,listener=Listener()) # s.filter(track=['crime']) from pymongo import MongoClient import pymongo db = MongoClient('mongodb://10.1.4.64:27017')['geodb']['geocoder'] fields =[("name", pymongo.DESCENDING),("country_name", pymongo.ASCENDING),("country_code", pymongo.ASCENDING)] db.create_index(keys=fields,unique=True) print("done")
class MongodbUtil(object): """ - .bashrc 또는 .bashprofile 에 MYSQL_PASSWD 를 설정해야 함. """ def __init__(self, mongo_url, db_name, collection_name, auto_connect=False): """ :param mongo_url: host, port, username, password, auth db :param db_name: database name :param collection_name: collection name :param auto_connect: default do not connect for multiprocessing (http://api.mongodb.com/python/current/faq.html#using-pymongo-with-multiprocessing) """ self.mongo_url = mongo_url self.db_name = db_name self.collection_name = collection_name self.auto_connect = auto_connect self.collection = MongoClient(mongo_url, socketKeepAlive=True, connect=auto_connect)[db_name][collection_name] def __repr__(self): return '%s (db_name:%s, collection_name:%s, auto_connect:%s)' % ( StringUtil.mask_passwd_in_url(self.mongo_url), self.db_name, self.collection_name, self.auto_connect) def __str__(self): return self.__repr__() def find(self, query=None, sort=None, limit=0): if query is None: query = {} if sort is None: sort = [('_id', ASCENDING)] for row in self.collection.find(query, no_cursor_timeout=True).sort(sort).limit(limit): yield row def count(self, query=None): if query is None: query = {} return self.collection.count(query, no_cursor_timeout=True) def find_one(self, query: dict, limit=0) -> dict: return self.collection.find_one(query, no_cursor_timeout=True).limit(limit) def create_index(self, field_list=None, unique=False): if field_list is None: field_list = [] for field in field_list: self.collection.create_index([(field, ASCENDING)], background=True, unique=unique) return def insert(self, row: dict): return self.collection.insert_one(row) def update_one(self, where_query: dict, update_content: dict, upsert=False): return self.collection.update_one( where_query, update_content, upsert=upsert ) def update(self, where_query: dict, update_content: dict, upsert=False): return self.collection.update_many( where_query, update_content, upsert=upsert ) def save(self, row): return self.collection.save(row) def delete(self, where_query: dict): result = self.collection.delete_one(where_query) if result: return result.deleted_count return 0 def drop(self): return self.collection.drop()
with open('pontixls.csv', 'rb') as f: csvfile = reader(f) for row in csvfile: doc = dict(nome=row[0].strip(), frequenza=row[1].strip(), shift=row[2].strip(), tono=row[3].strip(), regione=row[4].strip().lower(), provincia=row[5].strip().upper(), localita=row[6].strip(), gruppo=row[7].strip(), identificatore=row[8].strip(), traslatore=row[9].strip(), locator=row[10].strip(), gestore=row[15].strip()) if doc['locator'] != '': # calcola le coordinate approssimate (centro del riquadro) if is_valid_locator(doc['locator']): location = convert_locator(doc['locator']) doc['geoloc'] = [ get_longitude(location), get_latitude(location) ] ponti.insert_one(doc) ponti.create_index('nome') ponti.create_index('regione') ponti.create_index('provincia') ponti.create_index([('geoloc', GEOSPHERE)])
import json from amazon.dynamo import ProfileTwitter from twisted.internet import defer, threads from twisted.web import server, resource from twisted.web.resource import NoResource from app import fixed, keys from pymongo import MongoClient from league.services.shared import SharedPath profile_created = 'profile_created' profile_collection = MongoClient().test.profile_collection profile_collection.create_index(profile_created, expireAfterSeconds=3600 * 24) def add_profile(tp): profile_collection.insert_one( json.loads(json.dumps(tp._data, cls=fixed.SetEncoder))) return tp._data def error_profile(err, request): print 'error profile:', err request.write('') def get_profile(twitter_id, ts): profile_dict = {keys.entity_twitter_id: twitter_id}
responses: 200: description: Creation succeded """ request_params = request.form if "expression" not in request_params: return Response( '"Expression" must be present as a POST parameter!', status=404, mimetype="application/json", ) document = { "app_text": request_params["expression"], "indexed_date": datetime.datetime.utcnow(), } fulltext_search.save(document) return Response( json.dumps(document, default=json_util.default), status=200, mimetype="application/json", ) if __name__ == "__main__": # create the fulltext index fulltext_search.create_index([("app_text", TEXT)], name="fulltextsearch_index", default_language="english") # starts the app in debug mode, bind on all ip's and on port 5000 app.run(debug=True, host="0.0.0.0", port=5000)
\b /? (?!@) # not succeeded by a @, avoid matching "foo.na" in "*****@*****.**" ) )""") def clean(t): t = url.sub('', t) t = hashtag.sub('', t) t = regex.sub('\n', t) return t messages = MongoClient()['telegram_migrate']['messages'] messages.create_index([('channel', 1)]) messages.create_index([('_date', 1)]) configuration = { 'api_id': 165248, 'api_hash': '287208e1887c8e18f37d92a545a26376', 'title': 'SheyRoon', 'name': 'SheyRoon', 'public_keys': """ -----BEGIN RSA PUBLIC KEY----- MIIBCgKCAQEAwVACPi9w23mF3tBkdZz+zwrzKOaaQdr01vAbU4E1pvkfj4sqDsm6 lyDONS789sVoD/xCS9Y0hkkC3gtL1tSfTlgCMOOul9lcixlEKzwKENj1Yz/s7daS an9tqw3bfUV/nqgbhGX81v/+7RFAEd+RwFnK7a+XYl9sluzHRyVVaTTveB2GazTw Efzk2DWgkBluml8OREmvfraX3bkHZJTKX4EQSjBbbdJ2ZXIsRrYOXfaA+xayEGB+ 8hdlLmAjbCVfaigxX0CDqWeR1yFL9kwd9P0NsZRPsmoqVwMbMu7mStFai6aIhc3n Slv8kg9qv1m6XHVQY3PnEw+QQtqSIXklHwIDAQAB
#!/usr/bin/env python #-*- coding: utf-8 -*- # author: hao 2020/3/29-18:19 from pymongo import MongoClient import os coll = MongoClient(host="localhost", port=27017).Spider.LetMeSee coll.drop() coll.create_index([('WriteTime', 1)], expireAfterSeconds=43200) os.system( r"python D:\Fire\PycharmProject\LetMeSee\Spider\spider_mode\bilibili_spider.py" ) os.system( r"python D:\Fire\PycharmProject\LetMeSee\Spider\spider_mode\baidu_spider.py" ) os.system( r"python D:\Fire\PycharmProject\LetMeSee\Spider\spider_mode\zhihu_spider.py" )
class CarPipeline(object): def __init__(self): mongo_host = settings['MONGO_HOST'] mongo_port = settings['MONGO_PORT'] mongo_db = settings['MONGO_DB'] self.clct_brand = MongoClient(mongo_host, mongo_port)[mongo_db]['brand'] self.clct_sub_brand = MongoClient(mongo_host, mongo_port)[mongo_db]['sub_brand'] self.clct_car_train = MongoClient(mongo_host, mongo_port)[mongo_db]['car_train'] self.clct_car = MongoClient(mongo_host, mongo_port)[mongo_db]['car'] self.clct_car_config = MongoClient(mongo_host, mongo_port)[mongo_db]['car_config'] # self.clct_brand.drop() # self.clct_sub_brand.drop() # self.clct_car_train.drop() # self.clct_car.drop() self.clct_car_config.drop() # # self.clct_brand.create_index('id') # self.clct_sub_brand.create_index('id') # self.clct_car_train.create_index('id') # self.clct_car.create_index('id') self.clct_car_config.create_index('id') def process_item(self, item, spider): if spider.name == 'autohome_brand': if isinstance(item, BrandItem): self.save_brand(item, spider) elif isinstance(item, SubBrandItem): self.save_sub_brand(item, spider) elif isinstance(item, CarTrainItem): self.save_cartrain(item, spider) else: warnings.warn('can not deal %s : %s' % (item.__class__.__name__, str(item))) if spider.name == 'autohome_car': if isinstance(item, CarItem): self.save_car(item, spider) else: warnings.warn('can not deal %s : %s' % (item.__class__.__name__, str(item))) if spider.name == 'autohome_car_config': if isinstance(item, CarConfigItem): self.save_car_config(item, spider) return item def save_brand(self, item, spider): self.clct_brand.insert_one(dict(item)) def save_sub_brand(self, item, spider): self.clct_sub_brand.insert_one(dict(item)) def save_cartrain(self, item, spider): self.clct_car_train.insert_one(dict(item)) def save_car(self, item, spider): self.clct_car.insert_one(dict(item)) def save_car_config(self, item, spider): self.clct_car_config.insert_one(dict(item))
parser.add_option("-l", "--log", action="store", dest="log", default="log.txt", help="Input log file for profiler") parser.add_option("-f", "--format", action="store", dest="format", default="combined", help="Format of the input log") parser.add_option("-t", "--threads", action="store", dest="threads", default="12", help="Amout of threats that can be used") parser.add_option("-x", "--lines", action="store", dest="linesPerThread", default="250", help="Max lines per thread") parser.add_option("-p", "--procent", action="store", dest="procentToParse", default="100", help="Set how much of the logfile to parse") parser.add_option("-s", "--start", action="store", dest="startToParse", default="0", help="Set line number to start parsing from") parser.add_option("-d", "--db", action="store", dest="dbName", default="", help="Set collection to add parsed lines") options, args = parser.parse_args() ###################### #### Init #### initTime = str(datetime.datetime.now().hour) + "_" + str(datetime.datetime.now().minute) + "_" + str(datetime.datetime.now().second) MongoDB = MongoClient().FormattedLogs[options.dbName if options.dbName is not "" else options.log + ' - ' + initTime] startTime = datetime.datetime.now() MongoDB.create_index('index', background=True) ############## #### Determening lines #### with open(options.log) as f: num_lines = sum(1 for line in f) linesToProcess = (num_lines * int(options.procentToParse)) / 100 startIndex = int(options.startToParse) endIndex = num_lines if startIndex + linesToProcess > num_lines else startIndex + linesToProcess print 'Lines from {} till {} will be processed'.format(startIndex, endIndex) ###########################
def main(pid, weeks, idfilter, force_update, filter_user): """ from tracker import * pid="P699" idfilter="" weeks=52 force_update=False filter_user="******" """ coll_name = pid + "_" + idfilter if idfilter else pid save_name = coll_name + "_" + str( datetime.date.today()) + "_{}weeks".format(weeks) + ".xls" writer = pd.ExcelWriter(save_name) coll = MongoClient().wikidata[coll_name] coll.create_index("id") idfilter = [(k.split(":")[0], k.split(":")[1]) for k in idfilter.split(";")] if idfilter else [] extid_qid = id_mapper(pid, idfilter) qid_extid = {v: k for k, v in extid_qid.items()} qids = extid_qid.values() """ # what are the date extents of these items? # get the most recent timestamp and figure out how many weeks ago it was # warning, only checks the most recent timestamp! # as in, if you request one week, and then one year, it won't get anything before one week ago # unless force_update=True weeks_to_dl = weeks if not force_update: timestamps = set(x['timestamp'] for x in coll.find({'id': {'$in': list(qids)}}, {'timestamp': True})) if timestamps: if datetime.date.today() == max(timestamps).date(): print("most recent revision is today, skipping") weeks_to_dl = 0 else: weeks_to_dl = math.ceil(abs((max(timestamps) - datetime.datetime.now()).days / 7)) + 1 print("Most recent revision stored: {}".format(max(timestamps))) print("Getting revisions from the past {} weeks".format(weeks_to_dl)) need_revisions = get_revision_ids_needed(coll, qids, weeks=weeks_to_dl) print("Downloading revisions") download_revisions(coll, need_revisions, pid, qid_extid) print("Processing changes in the past {} weeks".format(weeks)) changes = process_revisions(coll, qids, weeks) for change in changes: change.pretty_refs() Change.lookupLabels(changes) df = pd.DataFrame([x.to_dict() for x in changes]) # reorder columns if not df.empty: df = df[["revid", "url", "timestamp", "user", "change_type", "comment", "has_ref", "merge", "metadata", "qid", "qid_label", "pid", "pid_label", "value", "value_label", "ref_str"]] df.to_excel(writer, sheet_name="changes") if not df.empty and filter_user: df = df.query("user != @filter_user") if not df.empty: df = df.query("user != 'KrBot'") df.to_excel(writer, sheet_name="changes_filtered") print("Processing label changes in the past {} weeks".format(weeks)) lda_changes = process_lda_revisions(coll, qids, weeks) Change.lookupLabels(lda_changes) lda_df = pd.DataFrame([x.to_dict() for x in lda_changes]) if not lda_df.empty: lda_df = lda_df[["revid", "url", "timestamp", "user", "change_type", "comment", "merge", "qid", "qid_label", "value"]] lda_df.to_excel(writer, sheet_name="labels") """ print("Getting redirects") redirect_df = get_merges(qids, weeks) redirect_df['history_url'] = redirect_df.page_title.apply( lambda x: "https://www.wikidata.org/w/index.php?title={}&action=history".format( x)) redirect_df['url'] = redirect_df.page_latest.apply( lambda x: "https://www.wikidata.org/w/index.php?diff={}".format(x)) redirect_df.to_excel(writer, sheet_name="redirects") writer.save()
$ref: '#/definitions/Place' type: array """ max_distance = int(request.args.get('max_distance', 10000)) limit = int(request.args.get('limit', 10)) cursor = places.find({ 'location': { '$near': { '$geometry': { 'type': 'Point', 'coordinates': [float(lng), float(lat)] }, '$maxDistance': max_distance } } }).limit(limit) extracted = [{ 'name': d['name'], 'lat': d['location']['coordinates'][1], 'lng': d['location']['coordinates'][0] } for d in cursor] return Response(json.dumps(extracted, default=json_util.default), status=200, mimetype='application/json') if __name__ == "__main__": port = int(os.environ.get('PORT', 5000)) places.create_index([('location', GEOSPHERE)], name='location_index') app.run(debug=True, host='0.0.0.0', port=port)
class MarketWatch(object): category = "marketwatch" def __init__(self): self.base_url = "http://www.marketwatch.com/investing/Stock/{}/financials" self.coll_base = MongoClient(HOST, PORT)[DB_MARKET][COLL_BASE] self.coll_values = MongoClient(HOST, PORT)[DB_MARKET][COLL_VALUES] self.coll_items = MongoClient(HOST, PORT)[DB_MARKET][COLL_ITEMS] self.coll = MongoClient(HOST, PORT)[DB][COLLECTION] self.type = ["Annual", "Quarter"] self.keys = ["Income Statement", "Balance Sheet", "Cash Flow Statement"] self.url_keys = ["INCOME_ANNUAL_URL", "BALANCE_ANNUAL_URL", "CASH_ANNUAL_URL", "INCOME_QUARTER_URL", "BALANCE_QUARTER_URL","CASH_QUARTER_URL"] self.logger = logger self.user_agent = choice(USER_AGENT) self.coll_base.create_index("ticker", unique=True) self.coll_items.create_index([("md5", pymongo.ASCENDING)], unique=True) self.coll_values.create_index([("md5_values", pymongo.ASCENDING)], unique=True) self.proxies = {"http": PROXIES} def urls_ticker(self, ticker): try: one_tick_urls = [marketwatch_config[url].format(ticker) for url in self.url_keys] return one_tick_urls except Exception as e: self.logger.info("Get conf error: type<{}>, msg<{}>".format(e.__class__, e)) def ticker_from_db(self): coll = self.coll try: tickers = coll.find({"code": {"$in": [re.compile("_NY_EQ"), re.compile("_NQ_EQ")]}}, {"tick": 1, "_id": 0}) code_ticker = [ticker["tick"] for ticker in tickers] return code_ticker except Exception as e: self.logger.info("Get ticker from mongodb error: type<{}>, msg<{}>".format(e.__class__, e)) return "" def fetch(self, ticker, number): url = self.urls_ticker(ticker)[number] html = requests.session() html.headers.update({"User-Agent": self.user_agent}) html.proxies = self.proxies try: response = html.get(url) if response.status_code == requests.codes.ok and response.url == url: return response.content except Exception as e: self.logger.info("Get html error: type<{}>, msg<{}>".format(e.__class__, e)) pass def get_md5(self, dict_item, number=None): md = hashlib.md5() if number: temp_item = ("_".join("%s:%s" % (key, value) for key, value in dict_item.items() if key not in ("ct")) + "_" + str(number)).encode("utf-8") md.update(temp_item) else: temp_item = "_".join("%s:%s" % (key, value) for key, value in dict_item.items() if key not in ("ct")).encode("utf-8") md.update(temp_item) return md.hexdigest() def parse(self, key, type=None): """ :param key: ticker :param type: year or quarter :return: """ type=None for number in range(6): if number < 3: type = self.type[0] else: type = self.type[1] response = self.fetch(key, number) if isinstance(response, str): selector = etree.HTML(response) else: return temp_factor = self.keys[number % 3] # 公司代码和名称 market_and_ticker = selector.xpath('//div[@id="instrumentheader"]')[0] name = market_and_ticker.xpath("h1")[0].xpath("string()").strip("\r\n").strip() info = market_and_ticker.xpath('p')[0].xpath("string()").strip("\r\n").strip() market = info.split(":")[0] base_info = { "name": name, "market": market, "ticker": key, "key": key, "ct": datetime.now() } last_base = self.coll_base.find_one({"ticker": key, "name": name, "market": market}) if not last_base: try: self.coll_base.insert(base_info) except errors.DuplicateKeyError as e: self.logger.info("Get pymongo error1: e.code<{}>,e.details<{}>".format(e.code, e.details)) pass # 获取表格title信息,包括财年,货币类型,货币单位 if not selector.xpath("//table"): pass else: detail = "" fy = "" unit = "" currency = "" content_topRow = selector.xpath('//div/table[@class]//tr[@class="topRow"][1]')[0] content_topRow_one = content_topRow.xpath("th[1]/text()") if type == "Annual": detail = content_topRow_one[0] if detail != " ": if "values" in detail: pattern = re.compile(r"Fiscal year is (.*). All values (.*) (.*).") fy, currency, unit = pattern.findall(detail)[0] else: pattern = re.compile(r"Fiscal year is (.*).") fy = pattern.findall(detail)[0] currency = None, unit = None else: fy = None currency = None unit = None if type == "Quarter": detail = content_topRow_one[0] if detail != ' ': fy = None pattern = re.compile(r"All values (.*) (.*).") currency, unit = pattern.findall(detail)[0] else: fy = None currency = None unit = None years_or_dates_temp = content_topRow.xpath("th[position()>1][position()<6]") years_or_dates = [one.text for one in years_or_dates_temp] # 获取items信息,提取层级关系 items_list = [] content_firstColumn = selector.xpath("//div/table[@class]//tbody/tr/td[1]") # 第一列 for one in content_firstColumn: items = {} try: if one.xpath("a")!=[]: items["item"] = one.xpath("a")[0].tail.strip("\r\t").strip() items["serie"] = 1 items["level"] = "L1" items["parent"] = None items["type"] = type items_list.append(items) else: if "mainRow" in one.getparent().get("class"): items["item"] = one.text items["serie"] = 1 items["level"] = "L1" items["parent"] = None items["type"] = type items_list.append(items) if "totalRow" in one.getparent().get("class"): items["item"] = one.text items["serie"] = 1 items["level"] = "L1" items["parent"] = None items["type"] = type items_list.append(items) if "partialSum" in one.getparent().get("class"): items["item"] = one.text items["serie"] = 1 items["level"] = "L1" items["parent"] = None items["type"] = type items_list.append(items) if "childRow" in one.getparent().get("class"): items["item"] = one.text items["serie"] = 2 items["level"] = "L2" items["parent"] = None items["type"] = type items_list.append(items) elif "rowLevel" in one.getparent().get("class"): tag = one.getparent().get("class") pattern = r"rowLevel-(\d+)" number = re.findall(pattern, tag)[0] items["item"] = one.text items["serie"] = int(number) items["level"] = "L" + str(number) items["parent"] = None items["type"] = type items_list.append(items) except Exception as e: self.logger.info("Get xpath error: type<{}>, msg<{}>".format(e.__class__, e)) length = len(items_list) new_items_list = [] for one in range(0, length, 1): try: if items_list[one]["serie"] == 1: new_items_list.append(items_list[one]) else: j = one while True: j -= 1 if (items_list[one]["serie"] - items_list[j]["serie"]) == 1: items_list[one]["parent"] = items_list[j]["item"] new_items_list.append(items_list[one]) break except Exception as e: self.logger.info("Get parent field error: type<{}>, msg<{}>".format(e.__class__, e)) Ratios_items = [item["item"] for item in new_items_list] # pprint (Ratios_items) # print len(Ratios_items) count = 0 for item in new_items_list: item_info = { "item": Ratios_items[count],#item["item"], "serie": item["serie"], "parent": item["parent"], "type": type, "level": item["level"], "code": None, "ct": datetime.now(), "factor": temp_factor, "ticker": key, } count += 1 # print item_info md5_id = self.get_md5(item_info, count) item_info["md5"] = md5_id try: last_one = self.coll_items.find_one({"md5": md5_id}) if not last_one: self.coll_items.insert(item_info) if item["parent"] is not None: _id = self.coll_items.find_one({"item": item["parent"]})["_id"] self.coll_items.update_one({"parent": item["parent"]}, {"$set": {"parent": _id}}) except errors.DuplicateKeyError as e: self.logger.info("Get pymongo error2: e.code<{}>, e.datails<{}>".format(e.code, e.details)) # values 信息 content_columm = selector.xpath("//div/table[@class]//tbody/tr/td[position()>1][position()<6]") content_values = [] for one in content_columm: value = one.xpath('text()|span/text()')[0].lower() if "(" in value: value = "-" + (value[1:-1]) if "m" in value: value = int(round(float(value[:-1]), 2) * 10 ** 6) elif "b" in value: value = int(round(float(value[:-1]), 2) * 10 ** 9) elif "t" in value: value = int(round(float(value[:-1]), 2) * 10 ** 12) elif value == "-": value = 0 elif "%" in value: pass elif "," in value: value = "".join(value.split(",")) else: try: value = int(value) except ValueError: value = float(value) finally: pass content_values.append(value) for i in range(len(years_or_dates)): for j in range(len(Ratios_items)): temp = content_values[i:len(content_values):len(years_or_dates)][j] # print temp values_info = { "key": key, # 股票简称 : FISV "item": Ratios_items[j], # 属性:科目 "value": temp, # 值 "type": type, # 年度 or 季度 "year": None, # 年份years_or_dates[i] "date": None, # 日期 "fy": fy, # 区间 "currency": currency, # 货币单位 "unit": unit, # 单位 "detail": detail, # "ct": datetime.now(), "factor": temp_factor } if type == "Annual": values_info["year"] = years_or_dates[i] else: values_info["date"] = years_or_dates[i] md5_values = self.get_md5(values_info, j) values_info["md5_values"] = md5_values # print values_info last_data = self.coll_values.find_one({"md5_values": md5_values}) if not last_data: try: self.coll_values.insert(values_info) except pymongo.errors.DuplicateKeyError as e: self.logger.info("Get pymongo error3: e.code<{}>, e.datails<{}>".format(e.code, e.details)) def main(self): thread_num = multiprocessing.cpu_count() code_ticker = self.ticker_from_db()#[0:100] type = self.type all_url = [self.urls_ticker(ticker) for ticker in code_ticker] pool = ThreadPool(thread_num) for i in range(len(code_ticker)): pool.apply_async(self.parse, args=(code_ticker[i],)) self.logger.info("MarketWatch Crawl the ticker is <{},{}>, total tickers<{}>".format(code_ticker[i], i, len(code_ticker))) wait_time = random() time.sleep(wait_time) pool.close() pool.join()
from pymongo import MongoClient # from annotator.keyword_annotator import KeywordAnnotator # from annotator.geoname_annotator import GeonameAnnotator if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument("-u", "--mongo_url", default="localhost", dest="u") parser.add_argument("-d", "--mongo_db", default="pmc", dest="d") parser.add_argument("-c", "--mongo_collection", default="articlesubset", dest="c") args = parser.parse_args() print("Making connection.") articles = MongoClient(args.u)[args.d][args.c] print("Writing index field...") articles.update_many({'keywords.disease-ontology': { '$type': 'object' }}, {'$set': { 'index.infectious': 1 }}) print("Creating index...") articles.create_index("index.infectious") print("Done.")
import json from pymongo import MongoClient with open("Source_Material/poems_etc/poems_etc_parsed/the_sonnets.json", "r") as s: sonnets = json.load(s) s.close() client = MongoClient() sonnet_collection = MongoClient().dustball_db.sonnets sonnet_collection.drop() def add_author(s): s["author_first_name"] = "william" s["author_last_name"] = "shakespeare" return s prepped_sonnets = list(map(add_author, sonnets)) sonnet_collection.insert_many(prepped_sonnets) sonnet_collection.create_index([("text", "text")]) client.close()
200: description: List of results schema: $ref: '#/definitions/Result' """ results = fulltext_search.find({ '$text': { '$search': searched_expression } }, { 'score': { '$meta': 'textScore' } }).sort([('score', { '$meta': 'textScore' })]).limit(10) results = [{ 'text': result['app_text'], 'date': result['indexed_date'].isoformat() } for result in results] return Response(json.dumps(list(results), default=json_util.default), status=200, mimetype='application/json') if __name__ == "__main__": #create the fulltext index fulltext_search.create_index([('app_text', TEXT)], name='fulltextsearch_index', default_language='english') app.run(debug=True, host='0.0.0.0', port=5000)
from pymongo import MongoClient if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument( "-u", "--mongo_url", default="localhost", dest = "u" ) parser.add_argument( "-d", "--mongo_db", default="pmc", dest = "d" ) parser.add_argument( "-c", "--mongo_collection", default="articlesubset", dest = "c" ) args = parser.parse_args() print("Making connection.") articles = MongoClient(args.u)[args.d][args.c] print("Writing index field...") articles.update_many({'meta.article-type': 'research-article'}, {'$set': {'index.research': 1}}) print("Creating index...") articles.create_index("index.research") print("Done.")
class Scraper: nlp = spacy.load('en_core_web_sm', disable=['tagger', 'ner']) processor = MaterialsTextProcessor() def __init__(self, classifiers, database='abstracts', collection='all', save_all=False, gen_tag='food science'): """ Initializes Scraper class :param classifiers: model to determine relevance of abstract :param database: defaults to 'abstracts', database to store abstracts in :param collection: defaults to 'all', collection to store abstracts in :param save_all: defaults to False, Bool flag to save all articles from query :param gen_tag: defaults to 'food science', name of tag to apply to all articles (required only if save_all is True) """ self._classifiers = classifiers self._collection = MongoClient(DATABASE_URL)[database][collection] self._save = save_all self._gen_tag = gen_tag self._gen_new = 0 self._gen_total = 0 # create collection indices self._collection.create_index('doi', name='doi', unique=True, sparse=True) self._collection.create_index('uid', name='uid', unique=True, sparse=True) self._collection.create_index('pmc', name='pmc', unique=True, sparse=True) self._collection.create_index('tags', name='tags') self._collection.create_index('database', name='database') def _get_date(self, date): """ Converts date into datetime object :param date: date formatted 'YYYY-MM-DD' """ if not date: return None date_array = date.split('-') return datetime.datetime(int(date_array[0]), int(date_array[1]), int(date_array[2])) def _save_all(self, articles): """ Stores all articles from database query (regardless of classifier result) under general tag :param articles: list of article objects to add to database :param doi: Bool flag for whether stored IDs are DOI """ self._gen_total += len(articles) # creates request to store article with corresponding tag requests = [] for article in articles: # creates document to insert by filtering out fields that are None doc = {k: v for k, v in article.items() if v is not None} doi = doc.get('doi') uid = doc.get('uid') pmc = doc.get('pmc') # sets either doi, uid, or pmc as the only id in that # preference order if doi: filter = {'doi': doi} doc.pop('uid', None) doc.pop('pmc', None) elif uid: filter = {'uid': uid} doc.pop('doi', None) doc.pop('pmc', None) else: filter = {'pmc': pmc} doc.pop('doi', None) doc.pop('uid', None) # if article is marked as relevant, inserts new document if it # does not exist and adds to tag requests.append( UpdateOne(filter, { '$setOnInsert': doc, '$addToSet': { 'tags': self._gen_tag } }, upsert=True)) # updates database if requests: mongo = self._collection.bulk_write(requests, ordered=False) self._gen_new += mongo.upserted_count + mongo.modified_count if mongo else 0 def _store(self, articles, abstracts): """ Classifies articles based on processed abstracts and stores in database if relevant :param articles: list of article objects to add to database :param abstracts: list of processed abstracts to be checked against classifier """ for classifier in self._classifiers: classifier.total += len(articles) # uses classifier to determine if relevant predictions = classifier.predict(abstracts) # creates request to store article with corresponding tag requests = [] for i, article in enumerate(articles): if predictions[i]: # creates document to insert by filtering out fields that are None doc = {k: v for k, v in article.items() if v is not None} doi = doc.get('doi') uid = doc.get('uid') pmc = doc.get('pmc') paperid = doc.get('paperid') # unique s2orc paper id # sets either doi, uid, or pmc as the only id in that # preference order if doi: filter = {'doi': doi} doc.pop('uid', None) doc.pop('pmc', None) doc.pop('paperid', None) elif uid: filter = {'uid': uid} doc.pop('doi', None) doc.pop('pmc', None) doc.pop('paperid', None) elif pmc: filter = {'pmc': pmc} doc.pop('doi', None) doc.pop('uid', None) doc.pop('paperid', None) else: filter = {'paperid': paperid} doc.pop('doi', None) doc.pop('uid', None) doc.pop('pmc', None) # if article is marked as relevant, inserts new document if it # does not exist and adds to tag requests.append( UpdateOne(filter, { '$setOnInsert': doc, '$addToSet': { 'tags': classifier.tag } }, upsert=True)) # ignore irrelevant articles, but keep track of their number else: classifier.irrelevant += 1 # updates database if requests: mongo = self._collection.bulk_write(requests, ordered=False) classifier.relevant += mongo.upserted_count + mongo.modified_count if mongo else 0 # if flag is marked True, store all articles from query to database if self._save: self._save_all(articles)
from pymongo import MongoClient import sys # First argument is mongodb_url # IP_ADDR:PORT mongodb_url = sys.argv[1] id_doc = {"type": "object_id_file", "object_id": 0} client = MongoClient(mongodb_url).stats.swift if MongoClient(mongodb_url).stats.swift.find_one({"type": "object_id_file" }) is None: client.insert_one(id_doc) client.create_index("type", unique=True)
"type": "Point", "coordinates": [float(lng), float(lat)], }, "$maxDistance": max_distance, } } } ).limit(limit) extracted = [ { "name": d["name"], "lat": d["location"]["coordinates"][1], "lng": d["location"]["coordinates"][0], } for d in cursor ] return Response( json.dumps(extracted, default=json_util.default), status=200, mimetype="application/json", ) if __name__ == "__main__": # cretes a GEOSHPHERE (2dsphere in MongoDb: https://docs.mongodb.com/manual/core/2dsphere/) index # named "location_index" on "location" field, it's used to search by distance places.create_index([("location", GEOSPHERE)], name="location_index") # starts the app in debug mode, bind on all ip's and on port 5000 app.run(debug=True, host="0.0.0.0", port=5000)
class cache_session(Session): def __init__(self, db_uri, dbname='tmp', colname='cache', expire_time=None, disabled=False): self.col = MongoClient(db_uri)[dbname][colname] self.disabled = disabled if expire_time: if not self.col.index_information().get('cache_time'+'_-1'): self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time) else: self.col.drop_indexes() self.col.create_index([("cache_time", DESCENDING)],expireAfterSeconds=expire_time) super(cache_session, self).__init__() def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None): req = ( method.upper(), url, headers, files, data or {}, json, params or {}, auth, cookies, hooks, ) req1 = { 'method': method.upper(), 'url': url, 'headers': headers, 'files': files, 'data': data or {}, 'json': json, 'params': params or {}, 'auth': auth, 'cookies': cookies, 'hooks': hooks, } req_to_str = '&'.join("%s=%s" % (k, v) for k, v in req1.items()) key = sha1(req_to_str).hexdigest() cached_one = self.col.find_one({'key': key}) if cached_one and not self.disabled: print 'cached' return cached_one['html'] else: online_req = super(cache_session, self).request(method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None ) html = online_req.text self.col.insert_one({'key': key, 'html': html, 'cache_time': datetime.utcnow()}) return html
def main(): if len(sys.argv) > 1: if sys.argv[1] == 'runserver': from runner import initRPC # Start server from here initRPC() elif sys.argv[1] == 'start_component': from core.manager.execute_start_component import start_component # Create component start_component(sys.argv[2]) elif sys.argv[1] == 'shutdown': from jsonrpclib import Server from socket import error from config.settings import CORE_PORT try: conn = Server('http://localhost:{0}'.format(CORE_PORT)) conn.main.stop() except error: print 'Core Services shutdown \t\t\t\t\t\t[OK]' elif sys.argv[1] == 'set_index': second_cmd = sys.argv[2] from pymongo import MongoClient from pymongo import ASCENDING from config.settings import MONGO_HOST_LOCAL from config.settings import MONGO_PORT_LOCAL url = "mongodb://{0}".format(MONGO_HOST_LOCAL) url += ":{0}".format(MONGO_PORT_LOCAL) cursor = MongoClient(url)['YouTube']['refined_data'] if second_cmd == 'show': print cursor.index_information() elif second_cmd == 'write': cursor.create_index([('id', ASCENDING)], unique=True) elif sys.argv[1] == 'logs': from os import system from config.settings import CORE_ID system('tail -f /var/log/core/{0}/*'.format(CORE_ID)) elif sys.argv[1] == 'memory': from jsonrpclib import Server from socket import error from config.settings import CORE_PORT try: key = sys.argv[2] conn = Server('http://localhost:{0}'.format(CORE_PORT)) data = conn.main.access_shared_memory(key) pretty(data) except error: print 'Core Services shutdown \t\t\t\t\t\t[OK]' elif sys.argv[1] == 'generate_settings': from shutil import copyfile from config.settings import BASE_DIR raw = BASE_DIR + '/config/settings_local.txt' gen = BASE_DIR + '/config/settings_local.py' copyfile(raw, gen) elif sys.argv[1] == 'plugin': from jsonrpclib import Server from socket import error from config.settings import CORE_PORT try: conn = Server('http://localhost:{0}'.format(CORE_PORT)) data = conn.listMethods() pretty(dumps(data)) except error: print 'Core Services shutdown \t\t\t\t\t\t[OK]' elif sys.argv[1] == 'help': if len(sys.argv) == 3: from os import system from jsonrpclib import Server from socket import error from config.settings import CORE_PORT try: conn = Server('http://localhost:{0}'.format(CORE_PORT)) data = conn.methodHelp(sys.argv[2]) system('echo {}'.format(data)) except error: print 'Core Services shutdown \t\t\t\t\t\t[OK]' elif len(sys.argv) == 2: print usage
ponti = MongoClient().hamradio.ponti ponti.drop() # ricostruisce la collection da capo with open('pontixls.csv', 'rb') as f: csvfile = reader(f) for row in csvfile: doc = dict(nome=row[0].strip(), frequenza=row[1].strip(), shift=row[2].strip(), tono=row[3].strip(), regione=row[4].strip().lower(), provincia=row[5].strip().upper(), localita=row[6].strip(), gruppo=row[7].strip(), identificatore=row[8].strip(), traslatore=row[9].strip(), locator=row[10].strip(), gestore=row[15].strip()) if doc['locator'] != '': # calcola le coordinate approssimate (centro del riquadro) if is_valid_locator(doc['locator']): location = convert_locator(doc['locator']) doc['geoloc'] = [get_longitude(location), get_latitude(location)] ponti.insert_one(doc) ponti.create_index('nome') ponti.create_index('regione') ponti.create_index('provincia') ponti.create_index([('geoloc', GEOSPHERE)])