def insertUrlList( db : couchdb.Database, urlList ): logging.info( "inserting url list..." ) for url in urlList: data = { '_id' : url, 'type' : 'url', 'visited' : False } db.save( data )
class EdgeDataBridge(object): """Edge Bridge""" def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.api_host = self.config_get('tenders_api_server') self.api_version = self.config_get('tenders_api_version') self.retrievers_params = self.config_get('retrievers_params') self.client = TendersClient(host_url=self.api_host, api_version=self.api_version, key='' ) self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('public_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) def config_get(self, name): return self.config.get('main').get(name) def get_teders_list(self): for item in get_tenders(host=self.api_host, version=self.api_version, key='', extra_params={'mode': '_all_'}, retrievers_params=self.retrievers_params): yield (item["id"], item["dateModified"]) def save_tender_in_db(self, tender_id, date_modified): tender_doc = self.db.get(tender_id) if tender_doc: if tender_doc['dateModified'] == date_modified: return tender = self.client.get_tender(tender_id).get('data') if tender: tender['_id'] = tender_id tender['doc_type'] = 'Tender' if tender_doc: tender['_rev'] = tender_doc['_rev'] logger.info('Update tender {} '.format(tender_id)) else: logger.info('Save tender {} '.format(tender_id)) try: self.db.save(tender) except Exception as e: logger.info('Saving tender {} fail with error {}'.format(tender_id, e.message), extra={'MESSAGE_ID': 'edge_bridge_fail_save_in_db'}) else: logger.info('Tender {} not found'.format(tender_id)) def run(self): logger.info('Start Edge Bridge', extra={'MESSAGE_ID': 'edge_bridge_start_bridge'}) logger.info('Start data sync...', extra={'MESSAGE_ID': 'edge_bridge__data_sync'}) for tender_id, date_modified in self.get_teders_list(): self.save_tender_in_db(tender_id, date_modified)
class CouchdbPuller(OutputModule): def __init__(self, actor_config, couchdb_url, payload=None, selection="data", bulk=100, parallel_streams=1, native_events=False, **kw): OutputModule.__init__(self, actor_config) self.pool.createQueue("inbox") self.registerConsumer(self.consume, "inbox") self.couchdb = Database(couchdb_url) self._bulk_size = bulk self._bulk = {} def __save(self): self.logging.debug("Saving: {} docs".format(len(self._bulk))) try: responce = self.couchdb.update( [doc for doc in self._bulk.values()]) for ok, doc_id, rest in responce: if ok: self.logging.info("Saved {}".format(doc_id)) else: self.logging.error( "Error on save bulk. Type {}, message {}, doc {}". format(rest, getattr(rest, 'message', ''), doc_id)) except Exception as e: self.logging.error("Uncaught error {} on save bulk".format(e, )) finally: self._bulk = {} self.logging.debug("Cleaned bulk") return False def consume(self, event): data = self.encode(self.getDataToSubmit(event)) if not isinstance(data, dict): try: data = loads(data) except ValueError: self.logging.error( "Unable to parse data from raw string. Skipping") id = data.get('id', data.get('_id')) if id: data['_id'] = data['id'] = id if id and (id in self.couchdb): rev = self.couchdb.get(id).rev data['_rev'] = rev self.logging.debug("Update revision in data {} to {}".format( id, rev)) self._bulk[data.get('_id', uuid4().hex)] = data self.logging.debug("Added {} to bulk queue. Size {}".format( id, len(self._bulk))) if len(self._bulk) >= self._bulk_size: g = spawn(self.__save) g.join()
class EdgeDataBridge(object): """Edge Bridge""" def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.api_host = self.config_get('tenders_api_server') self.api_version = self.config_get('tenders_api_version') self.client = TendersClient(host_url=self.api_host, api_version=self.api_version, key='') self.couch_url = urljoin(self.config_get('couch_url'), self.config_get('public_db')) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) def config_get(self, name): return self.config.get('main').get(name) def get_teders_list(self): for item in get_tenders(host=self.api_host, version=self.api_version, key='', extra_params={'mode': '_all_'}): yield (item["id"], item["dateModified"]) def save_tender_in_db(self, tender_id, date_modified): tender_doc = self.db.get(tender_id) if tender_doc: if tender_doc['dateModified'] == date_modified: return tender = self.client.get_tender(tender_id).get('data') if tender: tender['_id'] = tender_id tender['doc_type'] = 'Tender' if tender_doc: tender['_rev'] = tender_doc['_rev'] logger.info('Update tender {} '.format(tender_id)) else: logger.info('Save tender {} '.format(tender_id)) try: self.db.save(tender) except Exception as e: logger.info( 'Saving tender {} fail with error {}'.format( tender_id, e.message), extra={'MESSAGE_ID': 'edge_bridge_fail_save_in_db'}) else: logger.info('Tender {} not found'.format(tender_id)) def run(self): logger.info('Start Edge Bridge', extra={'MESSAGE_ID': 'edge_bridge_start_bridge'}) logger.info('Start data sync...', extra={'MESSAGE_ID': 'edge_bridge__data_sync'}) for tender_id, date_modified in self.get_teders_list(): self.save_tender_in_db(tender_id, date_modified)
def authenticate(request,id): result = "" docs = Database("https://*****:*****@wazza.cloudant.com/api_keys") sep = "_" list = string.split(id, sep) authenticate = docs.get(str(list[1])) if authenticate == None: result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Invalid API Key"></error>' return HttpResponse(result) else: return HttpResponse(lookup(str(list[0])))
def __init__(self, actor_config, couchdb_url, payload=None, selection="data", parallel_streams=1, native_events=False, **kw): OutputModule.__init__(self, actor_config) self.pool.createQueue("inbox") self.registerConsumer(self.consume, "inbox") self.couchdb = Database(couchdb_url)
def processEntries( db : couchdb.Database ): totalSentences = [x for x in db.iterview( 'sentences/sentences_count', 10 )][0].value sentenceCount = 1 for entry in db.iterview( 'sentences/sentences', 100 ) : if sentenceCount % 1000 == 0 : printProgress( sentenceCount, totalSentences ) for word in sentences.splitInWords( entry.value['sentence'] ) : if word and word != "" : updateWordDocument( db, word, entry.value ) sentenceCount = sentenceCount + 1 print("") # to clear printProgress
def authenticate(request, id): result = "" docs = Database( "https://*****:*****@wazza.cloudant.com/api_keys" ) sep = "_" list = string.split(id, sep) authenticate = docs.get(str(list[1])) if authenticate == None: result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Invalid API Key"></error>' return HttpResponse(result) else: return HttpResponse(lookup(str(list[0])))
class CouchdbOutput(OutputModule): def __init__(self, actor_config, couchdb_url, payload=None, selection="data", parallel_streams=1, native_events=False, **kw): OutputModule.__init__(self, actor_config) self.pool.createQueue("inbox") self.registerConsumer(self.consume, "inbox") self.couchdb = Database(couchdb_url) def consume(self, event): if event.isBulk(): bulk_docs = {} for e in extractBulkItems(event): doc = e.get(self.kwargs.selection) doc_id = doc.pop('id', doc.pop('_id', '')) if doc_id: doc['_id'] = doc['id'] = doc_id bulk_docs[doc['id']] = doc for row in self.couchdb.view('_all_docs', keys=list(bulk_docs.keys())).rows: if row.id in bulk_docs: bulk_docs[row.id]['_rev'] = row['value']['rev'] try: responce = self.couchdb.update(list(bulk_docs.values())) for ok, doc_id, rest in responce: if ok: self.logging.info("Saved {}".format(doc_id)) else: self.logging.error( "Error on save bulk. Type {}, message {}, doc {}". format(rest, getattr(rest, 'message', ''), doc_id)) except Exception as e: self.logging.error("Uncaught error {} on save bulk".format( e, )) else: data = event.get(self.kwargs.selection) doc_id = data.get('id', data.get('_id')) if doc_id: data['_id'] = data['id'] = doc_id if doc_id in self.couchdb: rev = self.couchdb.get(id).rev data['_rev'] = rev self.logging.debug( "Update revision in data {} to {}".format(id, rev)) self.couchdb.save(data)
def __init__(self, config, client): self.config = config self.sleep = self.config['TIME_TO_SLEEP'] self.lots_client = client(key=self.config['LOTS_API_TOKEN'], host_url=self.config["API_URL"], api_version=self.config["API_VERSION"]) self.assets_client = client(resource="assets", key=self.config['ASSETS_API_TOKEN'], host_url=self.config["API_URL"], api_version=self.config["API_VERSION"]) self.db = Database( "http://{login}:{password}@{host}:{port}/{db}".format( **self.config['LOTS_DB']), session=Session(retry_delays=range(10)))
def deleteAllFoundationDocuments( db : couchdb.Database ): thereAreRecords = True while thereAreRecords : query = db.find( {'selector' : { "source" : { "$eq" : "foundation" } }, "limit" : 10000000 } ) thereAreRecords = False for row in query : db.delete( row ) thereAreRecords = True
def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.api_host = self.config_get('tenders_api_server') self.api_version = self.config_get('tenders_api_version') self.client = TendersClient(host_url=self.api_host, api_version=self.api_version, key='') self.couch_url = urljoin(self.config_get('couch_url'), self.config_get('public_db')) self.db = Database(self.couch_url, session=Session(retry_delays=range(10)))
def __init__( self, actor_config, couchdb_url, view, view_expression, conditions=[], selection="data" ): FlowModule.__init__(self, actor_config) self.couchdb = Database(couchdb_url) self.pool.createQueue('inbox') self.registerConsumer(self.consume, 'inbox') self.prepare_expressions() self.view_expression = jq.jq(view_expression)
def production_change_stream(seq): """Given a sequence number in the npm registry change stream, start streaming from there! """ return Database(REGISTRY_URL).changes(feed='continuous', include_docs=True, since=seq)
def __init__(self, tender_id, worker_defaults={}, auction_data={}): self.tender_id = tender_id self.auction_doc_id = tender_id self._end_auction_event = Event() self.tender_url = urljoin( worker_defaults["resource_api_server"], '/api/{0}/auctions/{1}'.format( worker_defaults["resource_api_version"], tender_id)) if auction_data: self.debug = True LOGGER.setLevel(logging.DEBUG) self._auction_data = auction_data else: self.debug = False self.bids_actions = BoundedSemaphore() self.session = RequestsSession() self.features = {} # bw self.worker_defaults = worker_defaults if self.worker_defaults.get('with_document_service', False): self.session_ds = RequestsSession() self._bids_data = {} self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]), session=Session(retry_delays=range(10))) self.audit = {} self.retries = 10 self.mapping = {} self._bids_data = defaultdict(list) self.has_critical_error = False if REQUEST_QUEUE_SIZE == -1: self.bids_queue = Queue() else: self.bids_queue = Queue(REQUEST_QUEUE_SIZE) self.bidders_data = []
def detail(request, id, username): #Connect to DB docs = Database("https://*****:*****@wazza.cloudant.com/" + username + "/") #Check if number exists in cache #Second layer of exception handling, as this is already done by search try: doc = docs[id] #This should never happen here. But just incase :) #If we are in detail page means that the entry exists except ResourceNotFound: return HttpResponse( '<?xml version="1.0" Name="WS Project 2C"?> \n <error value = "Number not found"></error>' ) #return render_to_response('number/notFound.html') #Check whether form or direct URI was used if request.method == "POST": #See if correct/incorrect were checked if request.POST['correct_incorrect'] == '1': doc['correct'] = doc['correct'] + 1 elif request.POST['correct_incorrect'] == '0': doc['incorrect'] = doc['incorrect'] + 1 #Update count docs[id] = doc else: result = '<?xml version="1.0" Name="WS Project 2C"?> \n' for x in docs: result = result + '<number value = "' + x + '"</number> \n' return HttpResponse(result) #Render page again result = '<?xml version="1.0" Name="WS Project 2C"?> \n' for x in docs: result = result + '<number value = "' + x + '"</number> \n' return HttpResponse(result)
def process_doc(doc, client): doc['count'] = 0 doc['childCount'] = 0 if "asn_identifier" in doc: if 'uri' in doc['asn_identifier']: doc['id'] = doc['asn_identifier']['uri'].strip() else: doc['id'] = doc['asn_identifier'].strip() if 'id' in doc: url = doc['id'] doc['id'] = url[url.rfind("/") + 1:].lower() if "text" in doc: doc['title'] = doc['text'] for key in keys_to_remove: if key.strip() in doc: del doc[key] if "id" in doc: items = client.zrevrange(doc['id'], 0, -1) count = 0 local_db = Database("http://localhost:5984/lr-data") for doc_id in items: if doc_id in local_db: count += 1 doc['count'] = count if "children" in doc: for child in doc['children']: doc['childCount'] += process_doc(child, client) return doc['count'] + doc['childCount']
class BotWorker(object): def __init__(self, config, client): self.config = config self.sleep = self.config['TIME_TO_SLEEP'] self.lots_client = client(key=self.config['LOTS_API_TOKEN'], host_url=self.config["API_URL"], api_version=self.config["API_VERSION"]) self.assets_client = client(resource="assets", key=self.config['ASSETS_API_TOKEN'], host_url=self.config["API_URL"], api_version=self.config["API_VERSION"]) self.db = Database( "http://{login}:{password}@{host}:{port}/{db}".format( **self.config['LOTS_DB']), session=Session(retry_delays=range(10))) def get_lots(self, view): logger.info("Getting lots") try: return ({ "data": { 'id': lot.id, 'assets': lot.value['assets'], 'status': lot.value['status'] } } for lot in self.db.view(view) if lot.value['status'] in ['waiting', 'dissolved']) except Exception, e: ecode = e.args[0] if ecode in RETRYABLE_ERRORS: logger.error("Error while getting lots: {}".format(e))
def __init__(self, config, activate=False): super(AuctionsDataBridge, self).__init__() self.config = config self.tenders_ids_list = [] self.activate = activate self.client = ApiClient( '', host_url=self.config_get('tenders_api_server'), api_version=self.config_get('tenders_api_version')) params = {'opt_fields': 'status,auctionPeriod', 'mode': '_all_'} if parse_version( self.config_get('tenders_api_version')) > parse_version('0.9'): params['opt_fields'] += ',lots' self.client.params.update(params) self.tz = tzlocal() self.couch_url = urljoin(self.config_get('couch_url'), self.config_get('auctions_db')) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) if self.activate: self.queue = Queue() self.scheduler = GeventScheduler() self.scheduler.add_job(self.run_systemd_cmds, 'interval', max_instances=1, minutes=2, id='run_systemd_cmds') self.scheduler.start()
def __init__(self, actor_config, couchdb_url, native_events=False, seqfile="seqfile", destination="data", since=0, **kw): InputModule.__init__(self, actor_config) self.pool.createQueue("outbox") self.since = since self.seqfile = seqfile self.kw = kw try: self.couchdb = Database(couchdb_url) except HTTPError: self.logging.error("Invalid database name")
class CouchdbPoller(InputModule): def __init__(self, actor_config, couchdb_url, native_events=False, seqfile="seqfile", destination="data", since=0, **kw): InputModule.__init__(self, actor_config) self.pool.createQueue("outbox") self.since = since self.seqfile = seqfile self.kw = kw try: self.couchdb = Database(couchdb_url) except HTTPError: self.logging.error("Invalid database name") # TODO: create db def _get_doc(self, doc_id): return loads(self.couchdb.resource.get(doc_id)[2].read()) def preHook(self): if os.path.exists(self.seqfile): with open(self.seqfile) as seqfile: self.since = seqfile.read() self.logging.info('Restoring from seq: {}'.format(self.since)) self.sendToBackground(self.produce) def postHook(self): with open(self.seqfile, 'w+') as seqfile: seqfile.write(str(self.since)) def is_test_doc(self, doc): mode = doc.get('mode', False) if mode == "test": return True title = doc.get('title', False) if title and ("TESTING" in title.upper() or "ТЕСТУВАННЯ" in title.upper()): return True return False def produce(self): while self.loop(): for feed in self.couchdb.changes(feed="continuous", since=self.since): self.since = feed.get('seq', feed.get('last_seq', "now")) self.logging.debug("Change event {}".format(feed)) if 'id' in feed: doc = self._get_doc(feed['id']) if not self.is_test_doc(doc): e = Event(doc) self.submit(e, "outbox") sleep(0) self.logging.info("Stopping changes feed from couchdb")
def __init__(self, tender_id, worker_defaults={}, auction_data={}, lot_id=None, activate=False): super(Auction, self).__init__() self.generate_request_id() self.tender_id = tender_id self.lot_id = lot_id if lot_id: self.auction_doc_id = tender_id + "_" + lot_id else: self.auction_doc_id = tender_id self.tender_url = urljoin( worker_defaults["TENDERS_API_URL"], '/api/{0}/tenders/{1}'.format( worker_defaults["TENDERS_API_VERSION"], tender_id ) ) self.activate = activate if auction_data: self.debug = True logger.setLevel(logging.DEBUG) self._auction_data = auction_data else: self.debug = False self._end_auction_event = Event() self.bids_actions = BoundedSemaphore() self.session = RequestsSession() self.worker_defaults = worker_defaults if self.worker_defaults.get('with_document_service', False): self.session_ds = RequestsSession() self._bids_data = {} self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]), session=Session(retry_delays=range(10))) self.audit = {} self.retries = 10 self.bidders_count = 0 self.bidders_data = [] self.bidders_features = {} self.bidders_coeficient = {} self.features = None self.mapping = {} self.rounds_stages = []
def __init__( self, actor_config, couchdb_url, view_path, filter_key, filter_value, op, ): FlowModule.__init__(self, actor_config) self.pool.createQueue("outbox") self.view_path = view_path self.filter_key = jq.jq(filter_key) self.filter_value = jq.jq(filter_value) self.op = getattr(operator, op) self.pool.createQueue('outbox') self.pool.createQueue('inbox') self.couchdb = Database(couchdb_url) self.registerConsumer(self.consume, 'inbox')
def getMinimumSentenceThreshold(db: couchdb.Database, sentences_length_view: str, threshold: int): availableLengths = [ x.key for x in db.iterview(sentences_length_view, 2000000, group_level=1) ] availableLengths.sort(reverse=True) logging.info(f'the available lengths per word are {availableLengths}') lastElement = round(len(availableLengths) * (threshold / 100)) return availableLengths[lastElement]
def traverseTree( node, db : couchdb.Database ): sizeOfBook = len(node.find_all('p')) paragraphs = 1 for child in node.find_all('p') : paragraph = "" printProgress( paragraphs, sizeOfBook ) for string in child.stripped_strings: paragraph = paragraph + " " + string for sentence in sentences.splitParagraph( paragraph ): # todo: add to couch db the stuf..... if sentence and sentence != "" : now = datetime.datetime.now() doc = { '_id' : str(uuid.uuid4()), 'type' : 'sentence', 'sentence' : sentence, 'source' : 'foundation', 'date' : now.isoformat() } db.save( doc ) paragraphs = paragraphs + 1 print("")
def detail(request,username,id): if request.method == "DELETE": session = checkSession(username) if session == False: docs = Database("https://*****:*****@wazza.cloudant.com/"+username+"/") deldocs = docs #If delete all is checked if id=='deleteall': #Try to delete with given credientials try: for x in deldocs: temp = deldocs[x] docs.delete(temp) #Or report incorrect credentials except Unauthorized: return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <error value = "Could not authenticate"></error>') else: #Delete individial doc that is checked try: doc = docs[id] #Once again prompt if invalid credentials except: return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <error value = "No number/authentication"></error>') try: #This is the actual deletion of the doc docs.delete(doc) return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <success value = "Number Deleted"></success>') #This will never happen as the document will be there if it is listed for deletion #But just in case :) except ResourceNotFound: return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <error value = "Number not found"></error>') else: return HttpResponse('<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Session Expired"></error>') else: return HttpResponse('none') #return render_to_response('delete/delete.html',{'rows':docs,'username':u}) #Render success message return HttpResponse('nothing happened')
def index(request, username): #Connect to DB docsc = Database("https://*****:*****@wazza.cloudant.com/" + username + "/") #Check if direct URI or form if request.method == "POST": id = request.POST['id'].replace(' ', '') return detail(request, id, username) elif request.method == "DELETE": return HttpResponse('is delete here') else: return HttpResponse("not post")
def cli_load_fixture(): """Command line integration""" args = parser.parse_args() db = Database(url=args.database) fixture = json.load(args.fixture) # First delete any old design document. This avoids having to merge # attachments or store attachment revision history. try: del db[fixture["_id"]] except ResourceNotFound: pass put_fixture(db, fixture)
def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.api_host = self.config_get('tenders_api_server') self.api_version = self.config_get('tenders_api_version') self.retrievers_params = self.config_get('retrievers_params') try: self.client = TendersClient(host_url=self.api_host, api_version=self.api_version, key='') except MissingSchema: raise DataBridgeConfigError( 'In config dictionary empty or missing \'tenders_api_server\'') except ConnectionError as e: raise e self.couch_url = urljoin(self.config_get('couch_url'), self.config_get('public_db')) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) try: self.db.info() except ResourceNotFound: error_message = "Database with name '" + self.config_get( 'public_db') + "' doesn\'t exist" raise DataBridgeConfigError(error_message) except error as e: if e.errno == errno.ECONNREFUSED: raise DataBridgeConfigError( "Connection refused: 'couch_url' is invalid in config dictionary" ) except AttributeError as e: raise DataBridgeConfigError( '\'couch_url\' is missed or empty in config dictionary.') except KeyError as e: if e.message == 'db_name': raise DataBridgeConfigError( '\'public_db\' name is missed or empty in config dictionary' )
class Auction(object): """docstring for Auction""" def __init__(self, auction_doc_id, worker_defaults={}, auction_data={}): super(Auction, self).__init__() self.auction_doc_id = auction_doc_id self.tender_url = urljoin( worker_defaults["TENDERS_API_URL"], '/api/{0}/tenders/{1}'.format( worker_defaults["TENDERS_API_VERSION"], auction_doc_id ) ) if auction_data: self.debug = True logger.setLevel(logging.DEBUG) self._auction_data = auction_data else: self.debug = False self._end_auction_event = Event() self.bids_actions = BoundedSemaphore() self.worker_defaults = worker_defaults self._bids_data = {} self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]), session=Session(retry_delays=range(10))) self.retries = 10 def generate_request_id(self): self.request_id = generate_request_id() def get_auction_document(self): retries = self.retries while retries: try: self.auction_document = self.db.get(self.auction_doc_id) if self.auction_document: logger.info("Get auction document {0[_id]} with rev {0[_rev]}".format(self.auction_document), extra={"JOURNAL_REQUEST_ID": self.request_id, "MESSAGE_ID": AUCTION_WORKER_DB}) return except HTTPError, e: logger.error("Error while get document: {}".format(e), extra={'MESSAGE_ID': AUCTION_WORKER_DB}) except Exception, e: ecode = e.args[0] if ecode in RETRYABLE_ERRORS: logger.error("Error while save document: {}".format(e), extra={'MESSAGE_ID': AUCTION_WORKER_DB}) else: logger.critical("Unhandled error: {}".format(e), extra={'MESSAGE_ID': AUCTION_WORKER_DB}) retries -= 1
def checkSession(username): session = Database('https://*****:*****@wazza.cloudant.com/session/') for user in session: if (user == username): sessionDoc = session[user] sessionDate = string.split(sessionDoc['timestamp'], ' ') currentDate = strftime("%Y-%m-%d") if not (sessionDate[0] == currentDate): return True else: return False else: return True
def lookup(number): docs = Database( "https://*****:*****@wazza.cloudant.com/docs" ) look = [] result = "" try: record = docs[number] result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <name value = "' + record.get( 'name') + '"></name> \n <number value = "' + number + '"></number>' return HttpResponse(result) except ResourceNotFound: api_key = '1ae485ea2b55d91d888138ae624063e4' base_url_whiteP = 'http://api.whitepages.com/reverse_phone/1.0/?phone=' myopener = MyOpener() content_whiteP = myopener.open(base_url_whiteP + number + ';api_key=' + api_key).read() fName = re.search('wp:firstname>.*?<', content_whiteP) lName = re.search('wp:lastname>.*?<', content_whiteP) error = re.search('wp:errormessages', content_whiteP) if fName: look.append('Found') look.append(fName.group(0)[13:len(fName.group(0)) - 1]) look.append(lName.group(0)[12:len(lName.group(0)) - 1]) elif error: look.append('Error') else: look.append('NotFound') #XML Outputs if (look[0] == 'Found'): #Found result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <name value = "' + look[ 1] + ' ' + look[ 2] + '"></name> \n <number value = "' + number + '"></number>' return HttpResponse(result) elif (look[0] == 'Error'): #Error result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Incorrect Number"></error>' return HttpResponse(result) else: #Not Found result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Number not found"></error>' return HttpResponse(result)
def look(request, id, username): #Connect to DB docs = Database("https://*****:*****@wazza.cloudant.com/" + username + "/") session = checkSession(username) if session == False: return search(docs, id, username) else: return HttpResponse( '<?xml version="1.0" Name="WS Project 2C"?> \n <error value = "Session Expired"></error>' ) #Perform lookup return search(docs, id, username)
def processEntries(db: couchdb.Database, sleepHourList: list): totalSentences = getTotalSentences( db, 'sentences/sentences_not_processed_count') sentenceCount = 1 for entry in db.iterview('sentences/sentences_not_processed', 100): waitWhileSleepHour(sleepHourList) if sentenceCount % 1000 == 0: printProgress(sentenceCount, totalSentences) wordSet = {word for word in entry.value["word_list"] if word != ""} for word in wordSet: updateWordDocument(db, word, entry.value) setSentenceAsVisited(db, entry.id) sentenceCount = sentenceCount + 1 if sentenceCount > 1: print("") # to clear printProgress
def __init__(self, config): super(EdgeDataBridge, self).__init__() self.config = config self.api_host = self.config_get('tenders_api_server') self.api_version = self.config_get('tenders_api_version') self.retrievers_params = self.config_get('retrievers_params') self.client = TendersClient(host_url=self.api_host, api_version=self.api_version, key='' ) self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('public_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10)))
def __init__(self, config): super(AuctionsDataBridge, self).__init__() self.config = config self.tenders_url = urljoin( self.config_get('tenders_api_server'), '/api/{}/tenders'.format( self.config_get('tenders_api_version') ) ) self.tz = tzlocal() self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('auctions_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) self.url = self.tenders_url
def __init__(self, auction_doc_id, worker_defaults={}, auction_data={}): super(Auction, self).__init__() self.auction_doc_id = auction_doc_id self.tender_url = urljoin( worker_defaults["TENDERS_API_URL"], '/api/{0}/tenders/{1}'.format( worker_defaults["TENDERS_API_VERSION"], auction_doc_id ) ) if auction_data: self.debug = True logger.setLevel(logging.DEBUG) self._auction_data = auction_data else: self.debug = False self._end_auction_event = Event() self.bids_actions = BoundedSemaphore() self.worker_defaults = worker_defaults self._bids_data = {} self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]), session=Session(retry_delays=range(10))) self.retries = 10
from requests import get from couchdb import Database db = Database("http://localhost:5985/lr-data") split_on = ", supported by" page = 0 url = "http://12.109.40.31/search?terms=grade&page={0}" data = get(url.format(page)).json() while len(data) > 0: for item in data: if item['publisher'] is not None and split_on in item['publisher']: parts = [x.strip() for x in item['publisher'].split(split_on)] if parts[0] == parts[1]: print(parts) doc = db[item['_id']] item['publisher'] = parts[0] doc.update(item) print(db.save(doc)) page += 1 data = get(url.format(page)).json()
def __init__(self, mountpoint, uri=None, *args, **kwargs): fuse.Fuse.__init__(self, *args, **kwargs) db_uri, doc_id = uri.rsplit('/', 1) self.doc_id = unquote(doc_id) self.db = Database(db_uri)
class CouchFSDocument(fuse.Fuse): def __init__(self, mountpoint, uri=None, *args, **kwargs): fuse.Fuse.__init__(self, *args, **kwargs) db_uri, doc_id = uri.rsplit('/', 1) self.doc_id = unquote(doc_id) self.db = Database(db_uri) def get_dirs(self): dirs = {} attachments = self.db[self.doc_id].get('_attachments', {}).keys() for att in attachments: parents = [u''] for name in att.split('/'): filenames = dirs.setdefault(u'/'.join(parents[1:]), set()) if name != COUCHFS_DIRECTORY_PLACEHOLDER: filenames.add(name) parents.append(name) return dirs def readdir(self, path, offset): path = _normalize_path(path) for r in '.', '..': yield fuse.Direntry(r) for name in self.get_dirs().get(path, []): yield fuse.Direntry(name.encode('utf-8')) def getattr(self, path): path = _normalize_path(path) try: st = CouchStat() if path == '' or path in self.get_dirs().keys(): st.st_mode = stat.S_IFDIR | 0775 st.st_nlink = 2 else: att = self.db[self.doc_id].get('_attachments', {}) data = att[path] st.st_mode = stat.S_IFREG | 0664 st.st_nlink = 1 st.st_size = data['length'] return st except (KeyError, ResourceNotFound): return -errno.ENOENT def open(self, path, flags): path = _normalize_path(path) try: #data = self.db.get_attachment(self.db[self.doc_id], path.split('/')[-1]) #att = self.db[self.doc_id].get('_attachments', {}) #data = att[path.split('/')[-1]] parts = path.rsplit(u'/', 1) if len(parts) == 1: dirname, filename = u'', parts[0] else: dirname, filename = parts if filename in self.get_dirs()[dirname]: return 0 return -errno.ENOENT except (KeyError, ResourceNotFound): return -errno.ENOENT #accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR #if (flags & accmode) != os.O_RDONLY: # return -errno.EACCES def read(self, path, size, offset): path = _normalize_path(path) try: data = self.db.get_attachment(self.db[self.doc_id], path) slen = len(data) if offset < slen: if offset + size > slen: size = slen - offset buf = data[offset:offset+size] else: buf = '' return buf except (KeyError, ResourceNotFound): pass return -errno.ENOENT def write(self, path, buf, offset): path = _normalize_path(path) try: data = self.db.get_attachment(self.db[self.doc_id], path) data = data[0:offset] + buf + data[offset+len(buf):] self.db.put_attachment(self.db[self.doc_id], data, filename=path) return len(buf) except (KeyError, ResourceNotFound): pass return -errno.ENOENT def mknod(self, path, mode, dev): path = _normalize_path(path) self.db.put_attachment(self.db[self.doc_id], u'', filename=path) def unlink(self, path): path = _normalize_path(path) parts = path.rsplit(u'/', 1) if len(parts) == 1: dirname, filename = u'', parts[0] else: dirname, filename = parts self.db.delete_attachment(self.db[self.doc_id], path) if filename != COUCHFS_DIRECTORY_PLACEHOLDER and len(self.get_dirs().get(dirname, [])) == 0: print "putting to:", u'%s/%s' % (dirname, COUCHFS_DIRECTORY_PLACEHOLDER) self.db.put_attachment(self.db[self.doc_id], u'', filename=u'%s/%s' % (dirname, COUCHFS_DIRECTORY_PLACEHOLDER)) def truncate(self, path, size): path = _normalize_path(path) self.db.put_attachment(self.db[self.doc_id], u'', filename=path) return 0 def utime(self, path, times): return 0 def mkdir(self, path, mode): path = _normalize_path(path) self.db.put_attachment(self.db[self.doc_id], u'', filename=u'%s/%s' % (path, COUCHFS_DIRECTORY_PLACEHOLDER)) return 0 def rmdir(self, path): path = _normalize_path(path) self.db.delete_attachment(self.db[self.doc_id], u'%s/%s' % (path, COUCHFS_DIRECTORY_PLACEHOLDER)) return 0 def rename(self, pathfrom, pathto): pathfrom, pathto = _normalize_path(pathfrom), _normalize_path(pathto) data = self.db.get_attachment(self.db[self.doc_id], pathfrom) self.db.put_attachment(self.db[self.doc_id], data, filename=pathto) self.db.delete_attachment(self.db[self.doc_id], pathfrom) return 0 def fsync(self, path, isfsyncfile): return 0 def statfs(self): """ Should return a tuple with the following 6 elements: - blocksize - size of file blocks, in bytes - totalblocks - total number of blocks in the filesystem - freeblocks - number of free blocks - availblocks - number of blocks available to non-superuser - totalfiles - total number of file inodes - freefiles - nunber of free file inodes Feel free to set any of the above values to 0, which tells the kernel that the info is not available. """ st = fuse.StatVfs() block_size = 1024 blocks = 1024 * 1024 blocks_free = blocks blocks_avail = blocks_free files = 0 files_free = 0 st.f_bsize = block_size st.f_frsize = block_size st.f_blocks = blocks st.f_bfree = blocks_free st.f_bavail = blocks_avail st.f_files = files st.f_ffree = files_free return st
from json import load from couchdb import Database db = Database("http://localhost:5985/standards") children = load(open("D10003FC.json")) data = {"_id": "english", "children": children, "title": "english", "description": "english"} db.save(data) children = load(open("D100011F.json")) data = {"_id": "math", "children": children, "title": "math", "description": "math"} db.save(data)
#!/usr/bin/python from setup_config import setup_config from couchdb import Database from pylons import config from troppotardi.model import Email from troppotardi.lib.utils import send_email if __name__ == '__main__': setup_config() # Set up the database db = Database(config['couchdb_uri']) emails = Email.by_time(db) for email in emails: send_email(body=email.text, subject=email.subject, recipients=email.recipients, sender=email.sender) db.delete(email)
#!/usr/bin/python from setup_config import setup_config from pylons import config from couchdb import Database if __name__ == '__main__': setup_config() db = Database(config['couchdb_uri']) db.compact() db.compact('galleries')
#!/usr/bin/python from setup_config import setup_config from pylons import config from couchdb import Database if __name__ == '__main__': setup_config() db = Database(config['couchdb_uri']) db.compact() db.compact('images') db.compact('users') db.compact('emails')
class AuctionsDataBridge(object): """AuctionsDataBridge""" def __init__(self, config): super(AuctionsDataBridge, self).__init__() self.config = config self.tenders_url = urljoin( self.config_get('tenders_api_server'), '/api/{}/tenders'.format( self.config_get('tenders_api_version') ) ) self.tz = tzlocal() self.couch_url = urljoin( self.config_get('couch_url'), self.config_get('auctions_db') ) self.db = Database(self.couch_url, session=Session(retry_delays=range(10))) self.url = self.tenders_url def config_get(self, name): return self.config.get('main').get(name) def tender_url(self, tender_id): return urljoin(self.tenders_url, 'tenders/{}/auction'.format(tender_id)) def get_teders_list(self, re_planning=False): while True: params = {'offset': self.offset, 'opt_fields': 'status,auctionPeriod', 'mode': '_all_'} request_id = generate_request_id(prefix=b'data-bridge-req-') logger.debug('Start request to {}, params: {}'.format( self.url, params), extra={"JOURNAL_REQUEST_ID": request_id}) response = requests.get(self.url, params=params, headers={'content-type': 'application/json', 'X-Client-Request-ID': request_id}) logger.debug('Request response: {}'.format(response.status_code)) if response.ok: response_json = response.json() if len(response_json['data']) == 0: logger.info("Change offset date to {}".format(response_json['next_page']['offset']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) self.offset = response_json['next_page']['offset'] break for item in response_json['data']: if 'auctionPeriod' in item \ and 'startDate' in item['auctionPeriod'] \ and 'endDate' not in item['auctionPeriod'] \ and item['status'] == "active.auction": start_date = iso8601.parse_date(item['auctionPeriod']['startDate']) start_date = start_date.astimezone(self.tz) auctions_start_in_date = startDate_view( self.db, key=(mktime(start_date.timetuple()) + start_date.microsecond / 1E6) * 1000 ) if datetime.now(self.tz) > start_date: logger.info("Tender {} start date in past. Skip it for planning".format(item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) continue if re_planning and item['id'] in self.tenders_ids_list: logger.info("Tender {} already planned while replanning".format(item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) continue elif not re_planning and [row.id for row in auctions_start_in_date.rows if row.id == item['id']]: logger.info("Tender {} already planned on same date".format(item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) continue yield item if item['status'] == "cancelled": future_auctions = endDate_view( self.db, startkey=time() * 1000 ) if item["id"] in [i.id for i in future_auctions]: logger.info("Tender {} canceled".format(item["id"]), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) auction_document = self.db[item["id"]] auction_document["current_stage"] = -100 auction_document["endDate"] = datetime.now(self.tz).isoformat() self.db.save(auction_document) logger.info("Change auction {} status to 'canceled'".format(item["id"]), extra={"JOURNAL_REQUEST_ID": request_id, 'MESSAGE_ID': DATA_BRIDGE_PLANNING}) logger.info( "Change offset date to {}".format(response_json['next_page']['offset']), extra={"JOURNAL_REQUEST_ID": request_id, 'MESSAGE_ID': DATA_BRIDGE_PLANNING} ) self.offset = response_json['next_page']['offset'] else: sleep(10) def start_auction_worker(self, tender_item): result = do_until_success( check_output, args=([self.config_get('auction_worker'), 'planning', str(tender_item['id']), self.config_get('auction_worker_config')],), ) logger.info("Auction planning command result: {}".format(result), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_PROCESS}) def planning_with_couch(self): logger.info('Start Auctions Bridge with feed to couchdb', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) logger.info('Start data sync...', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) self.planned_tenders = {} self.last_seq_id = 0 while True: do_until_success(self.handle_continuous_feed) def handle_continuous_feed(self): change = self.db.changes(feed='continuous', filter="auctions/by_startDate", since=self.last_seq_id, include_docs=True) for tender_item in change: if 'id' in tender_item: start_date = tender_item['doc']['stages'][0]['start'] if tender_item['doc'].get("current_stage", "") == -100: continue if tender_item['doc'].get("mode", "") == "test": logger.info('Sciped test auction {}'.format(tender_item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) continue if tender_item['id'] in self.planned_tenders and \ self.planned_tenders[tender_item['id']] == start_date: logger.debug('Tender {} filtered'.format(tender_item['id'])) continue logger.info('Tender {} selected for planning'.format(tender_item['id']), extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) self.start_auction_worker(tender_item) self.planned_tenders[tender_item['id']] = start_date elif 'last_seq' in tender_item: self.last_seq_id = tender_item['last_seq'] logger.info('Resume data sync...', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) def run(self): logger.info('Start Auctions Bridge', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) self.offset = '' logger.info('Start data sync...', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) while True: for tender_item in self.get_teders_list(): logger.debug('Tender {} selected for planning'.format(tender_item)) self.start_auction_worker(tender_item) sleep(2) logger.info('Sleep...', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) sleep(100) logger.info('Resume data sync...', extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING}) def run_re_planning(self): self.re_planning = True self.tenders_ids_list = [] self.offset = '' logger.info('Start Auctions Bridge for re-planning...', extra={'MESSAGE_ID': DATA_BRIDGE_RE_PLANNING}) for tender_item in self.get_teders_list(re_planning=True): logger.debug('Tender {} selected for re-planning'.format(tender_item)) self.start_auction_worker(tender_item) self.tenders_ids_list.append(tender_item['id']) sleep(1) logger.info("Re-planning auctions finished", extra={'MESSAGE_ID': DATA_BRIDGE_RE_PLANNING})