Exemple #1
0
def insertUrlList( db : couchdb.Database, urlList ):
    logging.info( "inserting url list..." )
    for url in urlList: 
        data = { '_id' : url,
                 'type' : 'url', 
                 'visited' : False }
        db.save( data )
class EdgeDataBridge(object):

    """Edge Bridge"""

    def __init__(self, config):
        super(EdgeDataBridge, self).__init__()
        self.config = config
        self.api_host = self.config_get('tenders_api_server')
        self.api_version = self.config_get('tenders_api_version')
        self.retrievers_params = self.config_get('retrievers_params')

        self.client = TendersClient(host_url=self.api_host,
            api_version=self.api_version, key=''
        )

        self.couch_url = urljoin(
            self.config_get('couch_url'),
            self.config_get('public_db')
        )
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))

    def config_get(self, name):
        return self.config.get('main').get(name)

    def get_teders_list(self):
        for item in get_tenders(host=self.api_host, version=self.api_version,
                                key='', extra_params={'mode': '_all_'},
                                retrievers_params=self.retrievers_params):
            yield (item["id"], item["dateModified"])

    def save_tender_in_db(self, tender_id, date_modified):
        tender_doc = self.db.get(tender_id)
        if tender_doc:
            if tender_doc['dateModified'] == date_modified:
                return
        tender = self.client.get_tender(tender_id).get('data')
        if tender:
            tender['_id'] = tender_id
            tender['doc_type'] = 'Tender'
            if tender_doc:
                tender['_rev'] = tender_doc['_rev']
                logger.info('Update tender {} '.format(tender_id))
            else:
                logger.info('Save tender {} '.format(tender_id))
            try:
                self.db.save(tender)
            except Exception as e:
                logger.info('Saving tender {} fail with error {}'.format(tender_id, e.message),
                    extra={'MESSAGE_ID': 'edge_bridge_fail_save_in_db'})
        else:
            logger.info('Tender {} not found'.format(tender_id))

    def run(self):
        logger.info('Start Edge Bridge',
                    extra={'MESSAGE_ID': 'edge_bridge_start_bridge'})
        logger.info('Start data sync...',
                    extra={'MESSAGE_ID': 'edge_bridge__data_sync'})
        for tender_id, date_modified in self.get_teders_list():
            self.save_tender_in_db(tender_id, date_modified)
Exemple #3
0
class CouchdbPuller(OutputModule):
    def __init__(self,
                 actor_config,
                 couchdb_url,
                 payload=None,
                 selection="data",
                 bulk=100,
                 parallel_streams=1,
                 native_events=False,
                 **kw):
        OutputModule.__init__(self, actor_config)
        self.pool.createQueue("inbox")
        self.registerConsumer(self.consume, "inbox")
        self.couchdb = Database(couchdb_url)
        self._bulk_size = bulk
        self._bulk = {}

    def __save(self):
        self.logging.debug("Saving: {} docs".format(len(self._bulk)))
        try:
            responce = self.couchdb.update(
                [doc for doc in self._bulk.values()])
            for ok, doc_id, rest in responce:
                if ok:
                    self.logging.info("Saved {}".format(doc_id))
                else:
                    self.logging.error(
                        "Error on save bulk. Type {}, message {}, doc {}".
                        format(rest, getattr(rest, 'message', ''), doc_id))
        except Exception as e:
            self.logging.error("Uncaught error {} on save bulk".format(e, ))
        finally:
            self._bulk = {}
            self.logging.debug("Cleaned bulk")

        return False

    def consume(self, event):
        data = self.encode(self.getDataToSubmit(event))
        if not isinstance(data, dict):
            try:
                data = loads(data)
            except ValueError:
                self.logging.error(
                    "Unable to parse data from raw string. Skipping")
        id = data.get('id', data.get('_id'))
        if id:
            data['_id'] = data['id'] = id
        if id and (id in self.couchdb):
            rev = self.couchdb.get(id).rev
            data['_rev'] = rev
            self.logging.debug("Update revision in data {} to {}".format(
                id, rev))
        self._bulk[data.get('_id', uuid4().hex)] = data
        self.logging.debug("Added {} to bulk queue. Size {}".format(
            id, len(self._bulk)))
        if len(self._bulk) >= self._bulk_size:
            g = spawn(self.__save)
            g.join()
class EdgeDataBridge(object):
    """Edge Bridge"""
    def __init__(self, config):
        super(EdgeDataBridge, self).__init__()
        self.config = config
        self.api_host = self.config_get('tenders_api_server')
        self.api_version = self.config_get('tenders_api_version')

        self.client = TendersClient(host_url=self.api_host,
                                    api_version=self.api_version,
                                    key='')

        self.couch_url = urljoin(self.config_get('couch_url'),
                                 self.config_get('public_db'))
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))

    def config_get(self, name):
        return self.config.get('main').get(name)

    def get_teders_list(self):
        for item in get_tenders(host=self.api_host,
                                version=self.api_version,
                                key='',
                                extra_params={'mode': '_all_'}):
            yield (item["id"], item["dateModified"])

    def save_tender_in_db(self, tender_id, date_modified):
        tender_doc = self.db.get(tender_id)
        if tender_doc:
            if tender_doc['dateModified'] == date_modified:
                return
        tender = self.client.get_tender(tender_id).get('data')
        if tender:
            tender['_id'] = tender_id
            tender['doc_type'] = 'Tender'
            if tender_doc:
                tender['_rev'] = tender_doc['_rev']
                logger.info('Update tender {} '.format(tender_id))
            else:
                logger.info('Save tender {} '.format(tender_id))
            try:
                self.db.save(tender)
            except Exception as e:
                logger.info(
                    'Saving tender {} fail with error {}'.format(
                        tender_id, e.message),
                    extra={'MESSAGE_ID': 'edge_bridge_fail_save_in_db'})
        else:
            logger.info('Tender {} not found'.format(tender_id))

    def run(self):
        logger.info('Start Edge Bridge',
                    extra={'MESSAGE_ID': 'edge_bridge_start_bridge'})
        logger.info('Start data sync...',
                    extra={'MESSAGE_ID': 'edge_bridge__data_sync'})
        for tender_id, date_modified in self.get_teders_list():
            self.save_tender_in_db(tender_id, date_modified)
Exemple #5
0
def authenticate(request,id):
    result = ""
    docs = Database("https://*****:*****@wazza.cloudant.com/api_keys")
    sep = "_"
    list = string.split(id, sep)
    authenticate = docs.get(str(list[1]))
    if authenticate == None:
        result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Invalid API Key"></error>'
        return HttpResponse(result)
    else:
        return HttpResponse(lookup(str(list[0])))
Exemple #6
0
 def __init__(self,
              actor_config,
              couchdb_url,
              payload=None,
              selection="data",
              parallel_streams=1,
              native_events=False,
              **kw):
     OutputModule.__init__(self, actor_config)
     self.pool.createQueue("inbox")
     self.registerConsumer(self.consume, "inbox")
     self.couchdb = Database(couchdb_url)
Exemple #7
0
def processEntries( db : couchdb.Database ):
    totalSentences =  [x for x in db.iterview( 'sentences/sentences_count', 10 )][0].value
    sentenceCount = 1
    for entry in db.iterview( 'sentences/sentences', 100 ) :
        if sentenceCount % 1000 == 0 :
            printProgress( sentenceCount, totalSentences )
        for word in sentences.splitInWords( entry.value['sentence'] ) : 
            if word and word != "" :
                updateWordDocument( db, 
                                    word, 
                                    entry.value )
        sentenceCount = sentenceCount + 1
    print("") # to clear printProgress   
Exemple #8
0
def authenticate(request, id):
    result = ""
    docs = Database(
        "https://*****:*****@wazza.cloudant.com/api_keys"
    )
    sep = "_"
    list = string.split(id, sep)
    authenticate = docs.get(str(list[1]))
    if authenticate == None:
        result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Invalid API Key"></error>'
        return HttpResponse(result)
    else:
        return HttpResponse(lookup(str(list[0])))
Exemple #9
0
class CouchdbOutput(OutputModule):
    def __init__(self,
                 actor_config,
                 couchdb_url,
                 payload=None,
                 selection="data",
                 parallel_streams=1,
                 native_events=False,
                 **kw):
        OutputModule.__init__(self, actor_config)
        self.pool.createQueue("inbox")
        self.registerConsumer(self.consume, "inbox")
        self.couchdb = Database(couchdb_url)

    def consume(self, event):
        if event.isBulk():
            bulk_docs = {}
            for e in extractBulkItems(event):
                doc = e.get(self.kwargs.selection)
                doc_id = doc.pop('id', doc.pop('_id', ''))
                if doc_id:
                    doc['_id'] = doc['id'] = doc_id
                bulk_docs[doc['id']] = doc

            for row in self.couchdb.view('_all_docs',
                                         keys=list(bulk_docs.keys())).rows:
                if row.id in bulk_docs:
                    bulk_docs[row.id]['_rev'] = row['value']['rev']
            try:
                responce = self.couchdb.update(list(bulk_docs.values()))
                for ok, doc_id, rest in responce:
                    if ok:
                        self.logging.info("Saved {}".format(doc_id))
                    else:
                        self.logging.error(
                            "Error on save bulk. Type {}, message {}, doc {}".
                            format(rest, getattr(rest, 'message', ''), doc_id))
            except Exception as e:
                self.logging.error("Uncaught error {} on save bulk".format(
                    e, ))
        else:
            data = event.get(self.kwargs.selection)
            doc_id = data.get('id', data.get('_id'))
            if doc_id:
                data['_id'] = data['id'] = doc_id
                if doc_id in self.couchdb:
                    rev = self.couchdb.get(id).rev
                    data['_rev'] = rev
                    self.logging.debug(
                        "Update revision in data {} to {}".format(id, rev))
            self.couchdb.save(data)
 def __init__(self, config, client):
     self.config = config
     self.sleep = self.config['TIME_TO_SLEEP']
     self.lots_client = client(key=self.config['LOTS_API_TOKEN'],
                               host_url=self.config["API_URL"],
                               api_version=self.config["API_VERSION"])
     self.assets_client = client(resource="assets",
                                 key=self.config['ASSETS_API_TOKEN'],
                                 host_url=self.config["API_URL"],
                                 api_version=self.config["API_VERSION"])
     self.db = Database(
         "http://{login}:{password}@{host}:{port}/{db}".format(
             **self.config['LOTS_DB']),
         session=Session(retry_delays=range(10)))
Exemple #11
0
def deleteAllFoundationDocuments( db : couchdb.Database ):
    thereAreRecords = True
    while thereAreRecords :
        query = db.find( {'selector' : {
                            "source" : {
                                "$eq" : "foundation"
                                } 
                            },
                            "limit" : 10000000
                        } )
        thereAreRecords = False
        for row in query : 
            db.delete( row )
            thereAreRecords = True
    def __init__(self, config):
        super(EdgeDataBridge, self).__init__()
        self.config = config
        self.api_host = self.config_get('tenders_api_server')
        self.api_version = self.config_get('tenders_api_version')

        self.client = TendersClient(host_url=self.api_host,
                                    api_version=self.api_version,
                                    key='')

        self.couch_url = urljoin(self.config_get('couch_url'),
                                 self.config_get('public_db'))
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))
 def __init__(
     self,
     actor_config,
     couchdb_url,
     view,
     view_expression,
     conditions=[],
     selection="data"
 ):
     FlowModule.__init__(self, actor_config)
     self.couchdb = Database(couchdb_url)
     self.pool.createQueue('inbox')
     self.registerConsumer(self.consume, 'inbox')
     self.prepare_expressions()
     self.view_expression = jq.jq(view_expression)
Exemple #14
0
def production_change_stream(seq):
    """Given a sequence number in the npm registry change stream, start
    streaming from there!
    """
    return Database(REGISTRY_URL).changes(feed='continuous',
                                          include_docs=True,
                                          since=seq)
Exemple #15
0
    def __init__(self, tender_id, worker_defaults={}, auction_data={}):
        self.tender_id = tender_id
        self.auction_doc_id = tender_id
        self._end_auction_event = Event()
        self.tender_url = urljoin(
            worker_defaults["resource_api_server"],
            '/api/{0}/auctions/{1}'.format(
                worker_defaults["resource_api_version"], tender_id))
        if auction_data:
            self.debug = True
            LOGGER.setLevel(logging.DEBUG)
            self._auction_data = auction_data
        else:
            self.debug = False
        self.bids_actions = BoundedSemaphore()
        self.session = RequestsSession()
        self.features = {}  # bw
        self.worker_defaults = worker_defaults
        if self.worker_defaults.get('with_document_service', False):
            self.session_ds = RequestsSession()
        self._bids_data = {}
        self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]),
                           session=Session(retry_delays=range(10)))
        self.audit = {}
        self.retries = 10
        self.mapping = {}
        self._bids_data = defaultdict(list)
        self.has_critical_error = False
        if REQUEST_QUEUE_SIZE == -1:
            self.bids_queue = Queue()
        else:
            self.bids_queue = Queue(REQUEST_QUEUE_SIZE)

        self.bidders_data = []
Exemple #16
0
def detail(request, id, username):
    #Connect to DB
    docs = Database("https://*****:*****@wazza.cloudant.com/" + username +
                    "/")
    #Check if number exists in cache
    #Second layer of exception handling, as this is already done by search
    try:
        doc = docs[id]
    #This should never happen here. But just incase :)
    #If we are in detail page means that the entry exists
    except ResourceNotFound:
        return HttpResponse(
            '<?xml version="1.0" Name="WS Project 2C"?> \n <error value = "Number not found"></error>'
        )
        #return render_to_response('number/notFound.html')
    #Check whether form or direct URI was used
    if request.method == "POST":
        #See if correct/incorrect were checked
        if request.POST['correct_incorrect'] == '1':
            doc['correct'] = doc['correct'] + 1
        elif request.POST['correct_incorrect'] == '0':
            doc['incorrect'] = doc['incorrect'] + 1
        #Update count
        docs[id] = doc
    else:
        result = '<?xml version="1.0" Name="WS Project 2C"?> \n'
        for x in docs:
            result = result + '<number value = "' + x + '"</number> \n'
        return HttpResponse(result)
    #Render page again
    result = '<?xml version="1.0" Name="WS Project 2C"?> \n'
    for x in docs:
        result = result + '<number value = "' + x + '"</number> \n'
    return HttpResponse(result)
Exemple #17
0
def process_doc(doc, client):
    doc['count'] = 0
    doc['childCount'] = 0
    if "asn_identifier" in doc:
        if 'uri' in doc['asn_identifier']:
            doc['id'] = doc['asn_identifier']['uri'].strip()
        else:
            doc['id'] = doc['asn_identifier'].strip()
    if 'id' in doc:
        url = doc['id']
        doc['id'] = url[url.rfind("/") + 1:].lower()
    if "text" in doc:
        doc['title'] = doc['text']
    for key in keys_to_remove:
        if key.strip() in doc:
            del doc[key]
    if "id" in doc:
        items = client.zrevrange(doc['id'], 0, -1)
        count = 0
        local_db = Database("http://localhost:5984/lr-data")
        for doc_id in items:
            if doc_id in local_db:
                count += 1
        doc['count'] = count
    if "children" in doc:
        for child in doc['children']:
            doc['childCount'] += process_doc(child, client)
    return doc['count'] + doc['childCount']
class BotWorker(object):
    def __init__(self, config, client):
        self.config = config
        self.sleep = self.config['TIME_TO_SLEEP']
        self.lots_client = client(key=self.config['LOTS_API_TOKEN'],
                                  host_url=self.config["API_URL"],
                                  api_version=self.config["API_VERSION"])
        self.assets_client = client(resource="assets",
                                    key=self.config['ASSETS_API_TOKEN'],
                                    host_url=self.config["API_URL"],
                                    api_version=self.config["API_VERSION"])
        self.db = Database(
            "http://{login}:{password}@{host}:{port}/{db}".format(
                **self.config['LOTS_DB']),
            session=Session(retry_delays=range(10)))

    def get_lots(self, view):
        logger.info("Getting lots")
        try:
            return ({
                "data": {
                    'id': lot.id,
                    'assets': lot.value['assets'],
                    'status': lot.value['status']
                }
            } for lot in self.db.view(view)
                    if lot.value['status'] in ['waiting', 'dissolved'])
        except Exception, e:
            ecode = e.args[0]
            if ecode in RETRYABLE_ERRORS:
                logger.error("Error while getting lots: {}".format(e))
Exemple #19
0
    def __init__(self, config, activate=False):
        super(AuctionsDataBridge, self).__init__()
        self.config = config
        self.tenders_ids_list = []
        self.activate = activate
        self.client = ApiClient(
            '',
            host_url=self.config_get('tenders_api_server'),
            api_version=self.config_get('tenders_api_version'))
        params = {'opt_fields': 'status,auctionPeriod', 'mode': '_all_'}
        if parse_version(
                self.config_get('tenders_api_version')) > parse_version('0.9'):
            params['opt_fields'] += ',lots'
        self.client.params.update(params)
        self.tz = tzlocal()

        self.couch_url = urljoin(self.config_get('couch_url'),
                                 self.config_get('auctions_db'))
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))

        if self.activate:
            self.queue = Queue()
            self.scheduler = GeventScheduler()
            self.scheduler.add_job(self.run_systemd_cmds,
                                   'interval',
                                   max_instances=1,
                                   minutes=2,
                                   id='run_systemd_cmds')
            self.scheduler.start()
Exemple #20
0
 def __init__(self,
              actor_config,
              couchdb_url,
              native_events=False,
              seqfile="seqfile",
              destination="data",
              since=0,
              **kw):
     InputModule.__init__(self, actor_config)
     self.pool.createQueue("outbox")
     self.since = since
     self.seqfile = seqfile
     self.kw = kw
     try:
         self.couchdb = Database(couchdb_url)
     except HTTPError:
         self.logging.error("Invalid database name")
Exemple #21
0
class CouchdbPoller(InputModule):
    def __init__(self,
                 actor_config,
                 couchdb_url,
                 native_events=False,
                 seqfile="seqfile",
                 destination="data",
                 since=0,
                 **kw):
        InputModule.__init__(self, actor_config)
        self.pool.createQueue("outbox")
        self.since = since
        self.seqfile = seqfile
        self.kw = kw
        try:
            self.couchdb = Database(couchdb_url)
        except HTTPError:
            self.logging.error("Invalid database name")
            # TODO: create db

    def _get_doc(self, doc_id):
        return loads(self.couchdb.resource.get(doc_id)[2].read())

    def preHook(self):
        if os.path.exists(self.seqfile):
            with open(self.seqfile) as seqfile:
                self.since = seqfile.read()
                self.logging.info('Restoring from seq: {}'.format(self.since))
        self.sendToBackground(self.produce)

    def postHook(self):
        with open(self.seqfile, 'w+') as seqfile:
            seqfile.write(str(self.since))

    def is_test_doc(self, doc):
        mode = doc.get('mode', False)
        if mode == "test":
            return True
        title = doc.get('title', False)
        if title and ("TESTING" in title.upper()
                      or "ТЕСТУВАННЯ" in title.upper()):
            return True
        return False

    def produce(self):
        while self.loop():
            for feed in self.couchdb.changes(feed="continuous",
                                             since=self.since):
                self.since = feed.get('seq', feed.get('last_seq', "now"))
                self.logging.debug("Change event {}".format(feed))
                if 'id' in feed:
                    doc = self._get_doc(feed['id'])
                    if not self.is_test_doc(doc):
                        e = Event(doc)
                        self.submit(e, "outbox")
                sleep(0)
        self.logging.info("Stopping changes feed from couchdb")
Exemple #22
0
 def __init__(self, tender_id,
              worker_defaults={},
              auction_data={},
              lot_id=None,
              activate=False):
     super(Auction, self).__init__()
     self.generate_request_id()
     self.tender_id = tender_id
     self.lot_id = lot_id
     if lot_id:
         self.auction_doc_id = tender_id + "_" + lot_id
     else:
         self.auction_doc_id = tender_id
     self.tender_url = urljoin(
         worker_defaults["TENDERS_API_URL"],
         '/api/{0}/tenders/{1}'.format(
             worker_defaults["TENDERS_API_VERSION"], tender_id
         )
     )
     self.activate = activate
     if auction_data:
         self.debug = True
         logger.setLevel(logging.DEBUG)
         self._auction_data = auction_data
     else:
         self.debug = False
     self._end_auction_event = Event()
     self.bids_actions = BoundedSemaphore()
     self.session = RequestsSession()
     self.worker_defaults = worker_defaults
     if self.worker_defaults.get('with_document_service', False):
         self.session_ds = RequestsSession()
     self._bids_data = {}
     self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]),
                        session=Session(retry_delays=range(10)))
     self.audit = {}
     self.retries = 10
     self.bidders_count = 0
     self.bidders_data = []
     self.bidders_features = {}
     self.bidders_coeficient = {}
     self.features = None
     self.mapping = {}
     self.rounds_stages = []
Exemple #23
0
 def __init__(
     self,
     actor_config,
     couchdb_url,
     view_path,
     filter_key,
     filter_value,
     op,
 ):
     FlowModule.__init__(self, actor_config)
     self.pool.createQueue("outbox")
     self.view_path = view_path
     self.filter_key = jq.jq(filter_key)
     self.filter_value = jq.jq(filter_value)
     self.op = getattr(operator, op)
     self.pool.createQueue('outbox')
     self.pool.createQueue('inbox')
     self.couchdb = Database(couchdb_url)
     self.registerConsumer(self.consume, 'inbox')
Exemple #24
0
def getMinimumSentenceThreshold(db: couchdb.Database,
                                sentences_length_view: str, threshold: int):
    availableLengths = [
        x.key
        for x in db.iterview(sentences_length_view, 2000000, group_level=1)
    ]
    availableLengths.sort(reverse=True)
    logging.info(f'the available lengths per word are {availableLengths}')
    lastElement = round(len(availableLengths) * (threshold / 100))
    return availableLengths[lastElement]
Exemple #25
0
def traverseTree( node, db : couchdb.Database ):
    sizeOfBook = len(node.find_all('p'))
    paragraphs = 1
    for child in node.find_all('p') : 
        paragraph = ""
        printProgress( paragraphs, sizeOfBook )
        for string in child.stripped_strings: 
            paragraph = paragraph + " " + string
        for sentence in sentences.splitParagraph( paragraph ):
            # todo: add to couch db the stuf.....
            if sentence and sentence != "" : 
                now = datetime.datetime.now()
                doc = { '_id' : str(uuid.uuid4()), 
                        'type' : 'sentence', 
                        'sentence' : sentence, 
                        'source' : 'foundation', 
                        'date' : now.isoformat() }
                db.save( doc )
        paragraphs = paragraphs + 1
    print("")
Exemple #26
0
def detail(request,username,id):
    if request.method == "DELETE":
        session = checkSession(username)
        if session == False:
            docs = Database("https://*****:*****@wazza.cloudant.com/"+username+"/")
            deldocs = docs
            #If delete all is checked
            if id=='deleteall':
                #Try to delete with given credientials
                try:
                    for x in deldocs:
                        temp = deldocs[x]
                        docs.delete(temp)
                #Or report incorrect credentials
                except Unauthorized:
                    return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <error value = "Could not authenticate"></error>')
            else:
                #Delete individial doc that is checked
                try:    
                    doc = docs[id]
                #Once again prompt if invalid credentials
                except:
                    return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <error value = "No number/authentication"></error>')
                
                try:
                    #This is the actual deletion of the doc
                    docs.delete(doc)
                    return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <success value = "Number Deleted"></success>')
                #This will never happen as the document will be there if it is listed for deletion
                #But just in case :)
                except ResourceNotFound:
                    return HttpResponse('<?xml version="1.0" Name="WS Project 2D"?> \n <error value = "Number not found"></error>')    
        else:
            return HttpResponse('<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Session Expired"></error>')
    else:
        return HttpResponse('none')
        #return render_to_response('delete/delete.html',{'rows':docs,'username':u})
    
           
    #Render success message
    return HttpResponse('nothing happened')
Exemple #27
0
def index(request, username):
    #Connect to DB
    docsc = Database("https://*****:*****@wazza.cloudant.com/" + username +
                     "/")
    #Check if direct URI or form
    if request.method == "POST":
        id = request.POST['id'].replace(' ', '')
        return detail(request, id, username)
    elif request.method == "DELETE":
        return HttpResponse('is delete here')
    else:
        return HttpResponse("not post")
Exemple #28
0
def cli_load_fixture():
    """Command line integration"""
    args = parser.parse_args()
    db = Database(url=args.database)
    fixture = json.load(args.fixture)
    # First delete any old design document. This avoids having to merge
    # attachments or store attachment revision history.
    try:
        del db[fixture["_id"]]
    except ResourceNotFound:
        pass
    put_fixture(db, fixture)
Exemple #29
0
    def __init__(self, config):
        super(EdgeDataBridge, self).__init__()
        self.config = config
        self.api_host = self.config_get('tenders_api_server')
        self.api_version = self.config_get('tenders_api_version')
        self.retrievers_params = self.config_get('retrievers_params')

        try:
            self.client = TendersClient(host_url=self.api_host,
                                        api_version=self.api_version,
                                        key='')
        except MissingSchema:
            raise DataBridgeConfigError(
                'In config dictionary empty or missing \'tenders_api_server\'')
        except ConnectionError as e:
            raise e

        self.couch_url = urljoin(self.config_get('couch_url'),
                                 self.config_get('public_db'))
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))
        try:
            self.db.info()
        except ResourceNotFound:
            error_message = "Database with name '" + self.config_get(
                'public_db') + "' doesn\'t exist"
            raise DataBridgeConfigError(error_message)
        except error as e:
            if e.errno == errno.ECONNREFUSED:
                raise DataBridgeConfigError(
                    "Connection refused: 'couch_url' is invalid in config dictionary"
                )
        except AttributeError as e:
            raise DataBridgeConfigError(
                '\'couch_url\' is missed or empty in config dictionary.')
        except KeyError as e:
            if e.message == 'db_name':
                raise DataBridgeConfigError(
                    '\'public_db\' name is missed or empty in config dictionary'
                )
class Auction(object):
    """docstring for Auction"""
    def __init__(self, auction_doc_id,
                 worker_defaults={},
                 auction_data={}):
        super(Auction, self).__init__()
        self.auction_doc_id = auction_doc_id
        self.tender_url = urljoin(
            worker_defaults["TENDERS_API_URL"],
            '/api/{0}/tenders/{1}'.format(
                worker_defaults["TENDERS_API_VERSION"], auction_doc_id
            )
        )
        if auction_data:
            self.debug = True
            logger.setLevel(logging.DEBUG)
            self._auction_data = auction_data
        else:
            self.debug = False
        self._end_auction_event = Event()
        self.bids_actions = BoundedSemaphore()
        self.worker_defaults = worker_defaults
        self._bids_data = {}
        self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]),
                           session=Session(retry_delays=range(10)))
        self.retries = 10

    def generate_request_id(self):
        self.request_id = generate_request_id()

    def get_auction_document(self):
        retries = self.retries
        while retries:
            try:
                self.auction_document = self.db.get(self.auction_doc_id)
                if self.auction_document:
                    logger.info("Get auction document {0[_id]} with rev {0[_rev]}".format(self.auction_document),
                                extra={"JOURNAL_REQUEST_ID": self.request_id,
                                       "MESSAGE_ID": AUCTION_WORKER_DB})
                return
            except HTTPError, e:
                logger.error("Error while get document: {}".format(e),
                             extra={'MESSAGE_ID': AUCTION_WORKER_DB})
            except Exception, e:
                ecode = e.args[0]
                if ecode in RETRYABLE_ERRORS:
                    logger.error("Error while save document: {}".format(e),
                                 extra={'MESSAGE_ID': AUCTION_WORKER_DB})
                else:
                    logger.critical("Unhandled error: {}".format(e),
                                    extra={'MESSAGE_ID': AUCTION_WORKER_DB})
            retries -= 1
Exemple #31
0
def checkSession(username):
    session = Database('https://*****:*****@wazza.cloudant.com/session/')
    for user in session:
        if (user == username):
            sessionDoc = session[user]
            sessionDate = string.split(sessionDoc['timestamp'], ' ')
            currentDate = strftime("%Y-%m-%d")
            if not (sessionDate[0] == currentDate):
                return True
            else:
                return False
    else:
        return True
Exemple #32
0
def lookup(number):
    docs = Database(
        "https://*****:*****@wazza.cloudant.com/docs"
    )
    look = []
    result = ""
    try:
        record = docs[number]
        result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <name value = "' + record.get(
            'name') + '"></name> \n <number value = "' + number + '"></number>'
        return HttpResponse(result)
    except ResourceNotFound:
        api_key = '1ae485ea2b55d91d888138ae624063e4'
        base_url_whiteP = 'http://api.whitepages.com/reverse_phone/1.0/?phone='
        myopener = MyOpener()
        content_whiteP = myopener.open(base_url_whiteP + number + ';api_key=' +
                                       api_key).read()

        fName = re.search('wp:firstname>.*?<', content_whiteP)
        lName = re.search('wp:lastname>.*?<', content_whiteP)
        error = re.search('wp:errormessages', content_whiteP)

        if fName:
            look.append('Found')
            look.append(fName.group(0)[13:len(fName.group(0)) - 1])
            look.append(lName.group(0)[12:len(lName.group(0)) - 1])

        elif error:
            look.append('Error')

        else:
            look.append('NotFound')

        #XML Outputs
        if (look[0] == 'Found'):
            #Found
            result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <name value = "' + look[
                1] + ' ' + look[
                    2] + '"></name> \n <number value = "' + number + '"></number>'
            return HttpResponse(result)

        elif (look[0] == 'Error'):
            #Error
            result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Incorrect Number"></error>'
            return HttpResponse(result)

        else:
            #Not Found
            result = result + '<?xml version="1.0" Name="WS Project 2C API"?> \n <error value = "Number not found"></error>'
            return HttpResponse(result)
Exemple #33
0
def look(request, id, username):
    #Connect to DB
    docs = Database("https://*****:*****@wazza.cloudant.com/" + username +
                    "/")
    session = checkSession(username)
    if session == False:
        return search(docs, id, username)
    else:

        return HttpResponse(
            '<?xml version="1.0" Name="WS Project 2C"?> \n <error value = "Session Expired"></error>'
        )

    #Perform lookup
    return search(docs, id, username)
Exemple #34
0
def processEntries(db: couchdb.Database, sleepHourList: list):
    totalSentences = getTotalSentences(
        db, 'sentences/sentences_not_processed_count')
    sentenceCount = 1
    for entry in db.iterview('sentences/sentences_not_processed', 100):
        waitWhileSleepHour(sleepHourList)
        if sentenceCount % 1000 == 0:
            printProgress(sentenceCount, totalSentences)
        wordSet = {word for word in entry.value["word_list"] if word != ""}
        for word in wordSet:
            updateWordDocument(db, word, entry.value)
        setSentenceAsVisited(db, entry.id)
        sentenceCount = sentenceCount + 1
    if sentenceCount > 1:
        print("")  # to clear printProgress
    def __init__(self, config):
        super(EdgeDataBridge, self).__init__()
        self.config = config
        self.api_host = self.config_get('tenders_api_server')
        self.api_version = self.config_get('tenders_api_version')
        self.retrievers_params = self.config_get('retrievers_params')

        self.client = TendersClient(host_url=self.api_host,
            api_version=self.api_version, key=''
        )

        self.couch_url = urljoin(
            self.config_get('couch_url'),
            self.config_get('public_db')
        )
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))
    def __init__(self, config):
        super(AuctionsDataBridge, self).__init__()
        self.config = config

        self.tenders_url = urljoin(
            self.config_get('tenders_api_server'),
            '/api/{}/tenders'.format(
                self.config_get('tenders_api_version')
            )
        )
        self.tz = tzlocal()
        self.couch_url = urljoin(
            self.config_get('couch_url'),
            self.config_get('auctions_db')
        )
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))
        self.url = self.tenders_url
 def __init__(self, auction_doc_id,
              worker_defaults={},
              auction_data={}):
     super(Auction, self).__init__()
     self.auction_doc_id = auction_doc_id
     self.tender_url = urljoin(
         worker_defaults["TENDERS_API_URL"],
         '/api/{0}/tenders/{1}'.format(
             worker_defaults["TENDERS_API_VERSION"], auction_doc_id
         )
     )
     if auction_data:
         self.debug = True
         logger.setLevel(logging.DEBUG)
         self._auction_data = auction_data
     else:
         self.debug = False
     self._end_auction_event = Event()
     self.bids_actions = BoundedSemaphore()
     self.worker_defaults = worker_defaults
     self._bids_data = {}
     self.db = Database(str(self.worker_defaults["COUCH_DATABASE"]),
                        session=Session(retry_delays=range(10)))
     self.retries = 10
Exemple #38
0
from requests import get
from couchdb import Database
db = Database("http://localhost:5985/lr-data")

split_on = ", supported by"

page = 0

url = "http://12.109.40.31/search?terms=grade&page={0}"

data = get(url.format(page)).json()


while len(data) > 0:
    for item in data:
        if item['publisher'] is not None and split_on in item['publisher']:
            parts = [x.strip() for x in item['publisher'].split(split_on)]
            if parts[0] == parts[1]:
                print(parts)
                doc = db[item['_id']]
                item['publisher'] = parts[0]
                doc.update(item)
                print(db.save(doc))
    page += 1
    data = get(url.format(page)).json()
Exemple #39
0
 def __init__(self, mountpoint, uri=None, *args, **kwargs):
     fuse.Fuse.__init__(self, *args, **kwargs)
     db_uri, doc_id = uri.rsplit('/', 1)
     self.doc_id = unquote(doc_id)
     self.db = Database(db_uri)
Exemple #40
0
class CouchFSDocument(fuse.Fuse):
    def __init__(self, mountpoint, uri=None, *args, **kwargs):
        fuse.Fuse.__init__(self, *args, **kwargs)
        db_uri, doc_id = uri.rsplit('/', 1)
        self.doc_id = unquote(doc_id)
        self.db = Database(db_uri)

    def get_dirs(self):
        dirs = {}
        attachments = self.db[self.doc_id].get('_attachments', {}).keys()
        for att in attachments:
            parents = [u'']
            for name in att.split('/'):
                filenames = dirs.setdefault(u'/'.join(parents[1:]), set())
                if name != COUCHFS_DIRECTORY_PLACEHOLDER:
                    filenames.add(name)
                    parents.append(name)
        return dirs

    def readdir(self, path, offset):
        path = _normalize_path(path)
        for r in '.', '..':
            yield fuse.Direntry(r)
        for name in self.get_dirs().get(path, []):
            yield fuse.Direntry(name.encode('utf-8'))

    def getattr(self, path):
        path = _normalize_path(path)
        try:
            st = CouchStat()
            if path == '' or path in self.get_dirs().keys():
                st.st_mode = stat.S_IFDIR | 0775
                st.st_nlink = 2
            else:
                att = self.db[self.doc_id].get('_attachments', {})
                data = att[path]
                st.st_mode = stat.S_IFREG | 0664
                st.st_nlink = 1
                st.st_size = data['length']
            return st
        except (KeyError, ResourceNotFound):
            return -errno.ENOENT

    def open(self, path, flags):
        path = _normalize_path(path)
        try:
            #data = self.db.get_attachment(self.db[self.doc_id], path.split('/')[-1])
            #att = self.db[self.doc_id].get('_attachments', {})
            #data = att[path.split('/')[-1]]
            parts = path.rsplit(u'/', 1)
            if len(parts) == 1:
                dirname, filename = u'', parts[0]
            else:
                dirname, filename = parts
            if filename in self.get_dirs()[dirname]:
                return 0
            return -errno.ENOENT
        except (KeyError, ResourceNotFound):
            return -errno.ENOENT
        #accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
        #if (flags & accmode) != os.O_RDONLY:
        #    return -errno.EACCES

    def read(self, path, size, offset):
        path = _normalize_path(path)
        try:
            data = self.db.get_attachment(self.db[self.doc_id], path)
            slen = len(data)
            if offset < slen:
                if offset + size > slen:
                    size = slen - offset
                buf = data[offset:offset+size]
            else:
                buf = ''
            return buf
        except (KeyError, ResourceNotFound):
            pass
        return -errno.ENOENT

    def write(self, path, buf, offset):
        path = _normalize_path(path)
        try:
            data = self.db.get_attachment(self.db[self.doc_id], path)
            data = data[0:offset] + buf + data[offset+len(buf):]
            self.db.put_attachment(self.db[self.doc_id], data, filename=path)
            return len(buf)
        except (KeyError, ResourceNotFound):
            pass
        return -errno.ENOENT

    def mknod(self, path, mode, dev):
        path = _normalize_path(path)
        self.db.put_attachment(self.db[self.doc_id], u'', filename=path)

    def unlink(self, path):
        path = _normalize_path(path)
        parts = path.rsplit(u'/', 1)
        if len(parts) == 1:
            dirname, filename = u'', parts[0]
        else:
            dirname, filename = parts
        self.db.delete_attachment(self.db[self.doc_id], path)
        if filename != COUCHFS_DIRECTORY_PLACEHOLDER and len(self.get_dirs().get(dirname, [])) == 0:
            print "putting to:", u'%s/%s' % (dirname, COUCHFS_DIRECTORY_PLACEHOLDER)
            self.db.put_attachment(self.db[self.doc_id], u'', filename=u'%s/%s' % (dirname, COUCHFS_DIRECTORY_PLACEHOLDER))

    def truncate(self, path, size):
        path = _normalize_path(path)
        self.db.put_attachment(self.db[self.doc_id], u'', filename=path)
        return 0

    def utime(self, path, times):
        return 0

    def mkdir(self, path, mode):
        path = _normalize_path(path)
        self.db.put_attachment(self.db[self.doc_id], u'', filename=u'%s/%s' % (path, COUCHFS_DIRECTORY_PLACEHOLDER))
        return 0

    def rmdir(self, path):
        path = _normalize_path(path)
        self.db.delete_attachment(self.db[self.doc_id], u'%s/%s' % (path, COUCHFS_DIRECTORY_PLACEHOLDER))
        return 0

    def rename(self, pathfrom, pathto):
        pathfrom, pathto = _normalize_path(pathfrom), _normalize_path(pathto)
        data = self.db.get_attachment(self.db[self.doc_id], pathfrom)
        self.db.put_attachment(self.db[self.doc_id], data, filename=pathto)
        self.db.delete_attachment(self.db[self.doc_id], pathfrom)
        return 0

    def fsync(self, path, isfsyncfile):
        return 0

    def statfs(self):
        """
        Should return a tuple with the following 6 elements:
            - blocksize - size of file blocks, in bytes
            - totalblocks - total number of blocks in the filesystem
            - freeblocks - number of free blocks
            - availblocks - number of blocks available to non-superuser
            - totalfiles - total number of file inodes
            - freefiles - nunber of free file inodes
    
        Feel free to set any of the above values to 0, which tells
        the kernel that the info is not available.
        """
        st = fuse.StatVfs()
        block_size = 1024
        blocks = 1024 * 1024
        blocks_free = blocks
        blocks_avail = blocks_free
        files = 0
        files_free = 0
        st.f_bsize = block_size
        st.f_frsize = block_size
        st.f_blocks = blocks
        st.f_bfree = blocks_free
        st.f_bavail = blocks_avail
        st.f_files = files
        st.f_ffree = files_free
        return st
Exemple #41
0
from json import load
from couchdb import Database
db = Database("http://localhost:5985/standards")
children = load(open("D10003FC.json"))
data = {"_id": "english", "children": children, "title": "english", "description": "english"}
db.save(data)
children = load(open("D100011F.json"))
data = {"_id": "math", "children": children, "title": "math", "description": "math"}
db.save(data)
Exemple #42
0
#!/usr/bin/python
from setup_config import setup_config
from couchdb import Database
from pylons import config
from troppotardi.model import Email
from troppotardi.lib.utils import send_email

if __name__ == '__main__':
    setup_config()

    # Set up the database
    db = Database(config['couchdb_uri'])
    emails = Email.by_time(db)


    for email in emails:
        send_email(body=email.text,
                   subject=email.subject,
                   recipients=email.recipients,
                   sender=email.sender)
        db.delete(email)
#!/usr/bin/python
from setup_config import setup_config
from pylons import config
from couchdb import Database

if __name__ == '__main__':
    setup_config()

    db = Database(config['couchdb_uri'])
    db.compact()
    db.compact('galleries')
#!/usr/bin/python
from setup_config import setup_config
from pylons import config
from couchdb import Database

if __name__ == '__main__':
    setup_config()

    db = Database(config['couchdb_uri'])
    db.compact()
    db.compact('images')
    db.compact('users')
    db.compact('emails')

class AuctionsDataBridge(object):

    """AuctionsDataBridge"""

    def __init__(self, config):
        super(AuctionsDataBridge, self).__init__()
        self.config = config

        self.tenders_url = urljoin(
            self.config_get('tenders_api_server'),
            '/api/{}/tenders'.format(
                self.config_get('tenders_api_version')
            )
        )
        self.tz = tzlocal()
        self.couch_url = urljoin(
            self.config_get('couch_url'),
            self.config_get('auctions_db')
        )
        self.db = Database(self.couch_url,
                           session=Session(retry_delays=range(10)))
        self.url = self.tenders_url

    def config_get(self, name):
        return self.config.get('main').get(name)

    def tender_url(self, tender_id):
        return urljoin(self.tenders_url, 'tenders/{}/auction'.format(tender_id))

    def get_teders_list(self, re_planning=False):
        while True:
            params = {'offset': self.offset,
                      'opt_fields': 'status,auctionPeriod',
                      'mode': '_all_'}
            request_id = generate_request_id(prefix=b'data-bridge-req-')
            logger.debug('Start request to {}, params: {}'.format(
                self.url, params),
                extra={"JOURNAL_REQUEST_ID": request_id})

            response = requests.get(self.url, params=params,
                                    headers={'content-type': 'application/json',
                                             'X-Client-Request-ID': request_id})

            logger.debug('Request response: {}'.format(response.status_code))
            if response.ok:
                response_json = response.json()
                if len(response_json['data']) == 0:
                    logger.info("Change offset date to {}".format(response_json['next_page']['offset']),
                                extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
                    self.offset = response_json['next_page']['offset']
                    break
                for item in response_json['data']:
                    if 'auctionPeriod' in item \
                            and 'startDate' in item['auctionPeriod'] \
                            and 'endDate' not in item['auctionPeriod'] \
                            and item['status'] == "active.auction":

                        start_date = iso8601.parse_date(item['auctionPeriod']['startDate'])
                        start_date = start_date.astimezone(self.tz)
                        auctions_start_in_date = startDate_view(
                            self.db,
                            key=(mktime(start_date.timetuple()) + start_date.microsecond / 1E6) * 1000
                        )
                        if datetime.now(self.tz) > start_date:
                            logger.info("Tender {} start date in past. Skip it for planning".format(item['id']),
                                        extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
                            continue
                        if re_planning and item['id'] in self.tenders_ids_list:
                            logger.info("Tender {} already planned while replanning".format(item['id']),
                                        extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
                            continue
                        elif not re_planning and [row.id for row in auctions_start_in_date.rows if row.id == item['id']]:
                            logger.info("Tender {} already planned on same date".format(item['id']),
                                        extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
                            continue
                        yield item

                    if item['status'] == "cancelled":
                        future_auctions = endDate_view(
                            self.db, startkey=time() * 1000
                        )
                        if item["id"] in [i.id for i in future_auctions]:
                            logger.info("Tender {} canceled".format(item["id"]),
                                        extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
                            auction_document = self.db[item["id"]]
                            auction_document["current_stage"] = -100
                            auction_document["endDate"] = datetime.now(self.tz).isoformat()
                            self.db.save(auction_document)
                            logger.info("Change auction {} status to 'canceled'".format(item["id"]),
                                        extra={"JOURNAL_REQUEST_ID": request_id,
                                               'MESSAGE_ID': DATA_BRIDGE_PLANNING})

                logger.info(
                    "Change offset date to {}".format(response_json['next_page']['offset']),
                    extra={"JOURNAL_REQUEST_ID": request_id,
                           'MESSAGE_ID': DATA_BRIDGE_PLANNING}
                )
                self.offset = response_json['next_page']['offset']
            else:
                sleep(10)

    def start_auction_worker(self, tender_item):
        result = do_until_success(
            check_output,
            args=([self.config_get('auction_worker'),
                   'planning', str(tender_item['id']),
                   self.config_get('auction_worker_config')],),
        )
        logger.info("Auction planning command result: {}".format(result),
                    extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING_PROCESS})

    def planning_with_couch(self):
        logger.info('Start Auctions Bridge with feed to couchdb',
                    extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
        logger.info('Start data sync...',
                    extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
        self.planned_tenders = {}
        self.last_seq_id = 0
        while True:
            do_until_success(self.handle_continuous_feed)

    def handle_continuous_feed(self):
        change = self.db.changes(feed='continuous', filter="auctions/by_startDate",
                                 since=self.last_seq_id, include_docs=True)
        for tender_item in change:
            if 'id' in tender_item:
                start_date = tender_item['doc']['stages'][0]['start']
                if tender_item['doc'].get("current_stage", "") == -100:
                    continue

                if tender_item['doc'].get("mode", "") == "test":
                    logger.info('Sciped test auction {}'.format(tender_item['id']),
                                extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
                    continue

                if tender_item['id'] in self.planned_tenders and \
                        self.planned_tenders[tender_item['id']] == start_date:
                    logger.debug('Tender {} filtered'.format(tender_item['id']))
                    continue
                logger.info('Tender {} selected for planning'.format(tender_item['id']),
                            extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
                self.start_auction_worker(tender_item)
                self.planned_tenders[tender_item['id']] = start_date
            elif 'last_seq' in tender_item:
                self.last_seq_id = tender_item['last_seq']

        logger.info('Resume data sync...',
                    extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})

    def run(self):
        logger.info('Start Auctions Bridge',
                    extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
        self.offset = ''
        logger.info('Start data sync...',
                    extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
        while True:
            for tender_item in self.get_teders_list():
                logger.debug('Tender {} selected for planning'.format(tender_item))
                self.start_auction_worker(tender_item)
                sleep(2)
            logger.info('Sleep...',
                        extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})
            sleep(100)
            logger.info('Resume data sync...',
                        extra={'MESSAGE_ID': DATA_BRIDGE_PLANNING})

    def run_re_planning(self):
        self.re_planning = True
        self.tenders_ids_list = []
        self.offset = ''
        logger.info('Start Auctions Bridge for re-planning...',
                    extra={'MESSAGE_ID': DATA_BRIDGE_RE_PLANNING})
        for tender_item in self.get_teders_list(re_planning=True):
            logger.debug('Tender {} selected for re-planning'.format(tender_item))
            self.start_auction_worker(tender_item)
            self.tenders_ids_list.append(tender_item['id'])
            sleep(1)
        logger.info("Re-planning auctions finished",
                    extra={'MESSAGE_ID': DATA_BRIDGE_RE_PLANNING})