Beispiel #1
0
def get_data_from_schema():
    """

    :return:
    """
    log.info("Starting to export data from db")
    convert_schema = zip(export_models_pg, export_models_ac)
    ac_session = session()
    for export_model, import_model in convert_schema:
        log.info("Starting %s export", export_model.__name__)
        raw_table_lines = export_pg_model_to_list_of_str(export_model)
        prepared_lines = [
            line.strip().split(SEPARATOR) for line in raw_table_lines
        ]
        objects = [
            import_model(**import_model.import_(line))
            for line in prepared_lines
        ]
        log.debug("The total number of objects for adding: %s", len(objects))
        ac_session.bulk_save_objects(objects)
        ac_session.commit()
        log.info("Ending %s export - %s object(s)", export_model.__name__,
                 len(objects))
    ac_session.close()
    log.info("Ending db export\n")
Beispiel #2
0
    def make_moves(self, moves, opponent):
        """
        Make a coin move.

        Takes in a list of move strings of the form: "<Coin ID>_<Die Roll>"
        eg: "R0_1" will move Coin 0 of Player Red 1 position ahead.

        Since these moves will be read from the client,
        they are assumed to be valid.
        """
        # No move to play
        if "NA" in moves:
            return

        for move in moves:
            log.debug("Making Move: %s" % move)

            move_coin_name, die = move.split('_')
            coin_to_move = self.coins[move_coin_name]

            # Even if you open with 6, you still move 1 step
            if coin_to_move.rel_pos == 0 and die == '6':
                die = '1'

            # Move my coin
            coin_to_move += int(die)

            # If my coin killed someone then place them back in their yards
            for coin in opponent.coins.values():
                if (coin.abs_pos == coin_to_move.abs_pos
                        and not Board.is_safe(coin.rel_pos)):
                    coin.rel_pos = 0
Beispiel #3
0
async def update_database(db, filename):
    """Create indexes and import feeds"""

    entries = db.entries
    await db.entries.create_index([("date", DESCENDING)])
    await db.entries.create_index([("url", ASCENDING)])
    feeds = db.feeds
    await db.feeds.create_index([("url", ASCENDING)])
    # TODO: turn this into a bulk upsert
    for feed in feeds_from_opml(filename):
        if not await feeds.find_one({'url': feed['url']}):
            log.debug("Inserting %s" % feed)
            feed = dict(
                feed, **{
                    '_id': safe_id(feed['url']),
                    'created': datetime.now(),
                    'last_fetched': datetime(1970, 1, 1)
                })
            try:
                await feeds.insert_one(feed)
            except DuplicateKeyError as e:
                log.debug(e)
    redis = await connect_redis()
    await redis.hset(REDIS_NAMESPACE + 'status', 'feed_count', await
                     db.feeds.count_documents({}))
async def crawler(client, url_queue, archive):
    while True:
        url = await url_queue.get()
        try:
            log.debug("Crawling url: {}".format(url))
            headers = ACCEPT_HEADERS
            headers['Referer'] = archive['top']
            response = await client.get(url, headers=headers)
            if response.status != 200:
                raise Exception(
                    "got response code other than 200 for url: {}".format(url))
            else:
                data = await response.read()
                content_type, params = parse_header(
                    response.headers['content-type'])
                if CHANGE_DOMAIN_FROM and CHANGE_DOMAIN_TO:
                    wrUrl = url.replace(CHANGE_DOMAIN_FROM, CHANGE_DOMAIN_TO)
                else:
                    wrUrl = url
                item = {
                    "WebResourceData": data,
                    "WebResourceMIMEType": content_type,
                    "WebResourceURL": wrUrl
                }
                if 'charset' in params:
                    item['WebResourceTextEncodingName'] = params['charset']
                archive['items'].append(item)
        except Exception as exc:
            log.warn('Exception {}:'.format(exc), exc_info=True)

        finally:
            url_queue.task_done()
Beispiel #5
0
def drop_backend(conn, suffix, bename=None, maxnum=50):
    if not bename:
        bename = [x.dn for x in conn.getBackendsForSuffix(suffix)]
    
    if not bename:
        return None
        
    assert bename, "Missing bename for %r" % suffix
    if not hasattr(bename, '__iter__'):
        bename = [','.join(['cn=%s' % bename, lib389.DN_LDBM])]
    for be in bename:
        log.debug("removing entry from %r" % be)
        leaves = [x.dn for x in conn.search_s(
            be, ldap.SCOPE_SUBTREE, '(objectclass=*)', ['cn'])]
        # start deleting the leaves - which have the max number of ","
        leaves.sort(key=lambda x: x.count(","))
        while leaves and maxnum:
            # to avoid infinite loops
            # limit the iterations
            maxnum -= 1
            try:
                log.debug("removing %s" % leaves[-1])
                conn.delete_s(leaves[-1])
                leaves.pop()
            except:
                leaves.insert(0, leaves.pop())

        if not maxnum:
            raise Exception("BAD")
Beispiel #6
0
    def __readLine(self, file_fullname, encoding='ascii'):
        with open(file_fullname, 'rb') as fp:
            size = os.path.getsize(file_fullname)
            if size > self.__MaxSize:  # 出现过几十G大文件、第一行就非常大,导致内存耗尽情况
                fp.seek(-self.__MaxSize, 2)
            for lines, line_binary in enumerate(fp):
                if lines < self.__StartLine:
                    continue
                if lines > self.__EndLine:
                    break

                try:
                    line = line_binary.decode(encoding=encoding)
                    log.debug(str(lines) + ' ' + line)
                    if self.__RegularExpFrom != '':
                        line = re.sub(self.__RegularExpFrom,
                                      self.__RegularExpTo, line)
                    yield line
                except UnicodeDecodeError:
                    encoding = detect(line_binary[:self.__CodecCheckSize])[
                        'encoding']  # 出现过一行10M,本函数不退出的情况
                    if encoding is None:
                        raise  # 无法识别编码,向上层传递异常
                    line = line_binary.decode(encoding=encoding)
                    if self.__RegularExpFrom != '':
                        line = re.sub(self.__RegularExpFrom,
                                      self.__RegularExpTo, line)
                    yield line
Beispiel #7
0
    def load(self):
        """ load entity data from storage and merge vars in self """
        try:
            entity = self._tableservice.get_entity(self._tablename,
                                                   self._PartitionKey,
                                                   self._RowKey)
            self._existsinstorage = True
            """ sync with entity values """
            for key, default in vars(self.__class__).items():
                if not key.startswith('_') and key != '':
                    if isinstance(default, StorageTableCollection):
                        collection = getattr(self, key)
                        collection.setfilter()
                        collection.load()
                    else:
                        value = getattr(entity, key, None)
                        if not value is None:
                            oldvalue = getattr(self, key, default)
                            if oldvalue == default:
                                setattr(self, key, value)

        except AzureMissingResourceHttpError as e:
            log.debug(
                'can not get table entity:  Table {}, PartitionKey {}, RowKey {} because {!s}'
                .format(self._tablename, self._PartitionKey, self._RowKey, e))
            self._existsinstorage = False
Beispiel #8
0
    def table_isempty(self, tablename, PartitionKey='', RowKey='') -> bool:
        if (tablename in self.tables) and (not self.tableservice is None):

            filter = "PartitionKey eq '{}'".format(
                PartitionKey) if PartitionKey != '' else ''
            if filter == '':
                filter = "RowKey eq '{}'".format(
                    RowKey) if RowKey != '' else ''
            else:
                filter = filter + ("and RowKey eq '{}'".format(RowKey)
                                   if RowKey != '' else '')
            try:
                entities = list(
                    self.tableservice.query_entities(tablename,
                                                     filter=filter,
                                                     select='PartitionKey',
                                                     num_results=1))
                if len(entities) == 1:
                    return False
                else:
                    return True

            except AzureMissingResourceHttpError as e:
                log.debug('failed to query {} with error {}'.format(
                    tablename, e))
                return True

        else:
            return True
        pass
Beispiel #9
0
    def _sync(self, tag, force):

        repo_name = self.config['repo_name']
        sync_script = '{0}/{1}.sync'.format(self.config["sync_dir"], repo_name)

        if os.path.exists(sync_script):
            log.info('{0} :: Calling sync script at {1}'.format(__name__,
                                                                sync_script))
            proc = subprocess.Popen([sync_script,
                                     '--repo="{0}"'.format(repo_name),
                                     '--tag="{0}"'.format(tag),
                                     '--force="{0}"'.format(force)])
            proc_out = proc.communicate()[0]
            log.info(proc_out)

            if proc.returncode != 0:
                exit_code = 40
                log.error("{0} :: {1}".format(__name__, exit_codes[exit_code]))
                return exit_code
        else:
            # In absence of a sync script -- Tag the repo
            log.debug(__name__ + ' :: Calling default sync.')

            try:
                self._dulwich_tag(tag, self._make_author())
            except Exception as e:
                log.error(str(e))
                raise SartorisError(message=exit_codes[12], exit_code=12)

            self._default_sync()

        self._remove_lock()
        return 0
Beispiel #10
0
 def load(self):
     if not self._tableservice is None:
         log.debug('query table {} filter by {}'.format(
             self._tablename, self._filter))
         self._entities = self._tableservice.query_entities(
             self._tablename, self._filter)
         self._count = len(list(self._entities))
def sample_words():
    text_dir = '../data/train/txt_train'
    count = set([
        '',
    ])
    for txtname in os.listdir(text_dir):
        with open(text_dir + '/' + txtname, 'r', encoding="utf-8") as f:
            for line in f.readlines():
                text = line.split(',')[-1].strip()
                if text == '###':
                    continue
                textSet = set(text)
                count.update(textSet)
                log.debug('textSet now: %s' % (textSet, ))
    log.info('%s char found.' % (len(count), ))

    # for txtname in os.listdir(text_dir):
    #     with open(os.path.join(text_dir, txtname), 'r', encoding="utf-8") as f:
    #         for line in f.readlines():
    #             text = line.split(',')[-1].strip()
    #             textSet = set(text)
    #             for c in textSet:
    #                 assert c in count

    for character in count:
        log.debug(character)
    with open(WORDDICT, 'w', encoding='utf-8') as f:
        for character in count:
            f.write(character)
            f.write('\n')
Beispiel #12
0
def main():
    log.info('otrrentworker start main in {} environment....'.format(
        config['APPLICATION_ENVIRONMENT']))
    """ initiate transmission-deamon """
    daemonstarted = True
    if not config['APPLICATION_ENVIRONMENT'] in ['Development']:
        daemonstarted = start_transmission()

    if daemonstarted:
        """ schedule workers """
        if config['APPLICATION_ENVIRONMENT'] == 'Development':
            schedule.every(1).minutes.do(runetl, config, log)
            """ log configuration in debug mode """
            for key, value in config.items():
                log.debug('otrrentworker configuration: {} = {!s}'.format(
                    key, value))

        elif config['APPLICATION_ENVIRONMENT'] == 'Test':
            schedule.every(1).minutes.do(runworker, config, log)
            schedule.every(1).hours.do(runetl, config, log)

        else:
            schedule.every(5).minutes.do(runworker, config, log)
            schedule.every().day.at("12:00").do(runetl, config, log)
            schedule.every().day.at("00:00").do(runetl, config, log)
        """ run until stopsignal """
        while not stopsignal:
            schedule.run_pending()
            time.sleep(1)
    """ goodby """
    log.info('otrrentworker service terminated. Goodby!')
Beispiel #13
0
def db_link_authors(words: List[List[str]]) -> None:
    """
    Create relations between words and their authors (WID <-> AID) in DB
    These connections locate in 't_connect_authors' table

    :param words: List of words' data received from a text file
        using function convert_file_to_list
    :return: None
    """
    log.info("Start to link words with their authors")

    all_authors = Author.query.all()
    author_by_abbr = {author.abbreviation: author for author in all_authors}

    # Get a dictionary with a list of abbreviations of authors of each word by id_old
    dict_of_authors_data_as_dict = {
        int(word_data[0]): word_data[5].split(" ", 1)[0].split("/")
        for word_data in words
    }

    for word in Word.query.all():
        authors_abbreviations = dict_of_authors_data_as_dict[word.id_old]
        word.add_authors([
            author_by_abbr[abbreviation]
            for abbreviation in authors_abbreviations
        ])
        log_text = f"{word.name} {' '*(26-len(word.name))}-> {'/'.join(authors_abbreviations)}"
        log.debug(log_text)
    db.session.commit()
    log.info("Finish to link words with their authors")
Beispiel #14
0
def drop_backend(conn, suffix, bename=None, maxnum=50):
    if not bename:
        bename = [x.dn for x in conn.getBackendsForSuffix(suffix)]

    if not bename:
        return None

    assert bename, "Missing bename for %r" % suffix
    if not hasattr(bename, '__iter__'):
        bename = [','.join(['cn=%s' % bename, dsadmin.DN_LDBM])]
    for be in bename:
        log.debug("removing entry from %r" % be)
        leaves = [
            x.dn for x in conn.search_s(be, ldap.SCOPE_SUBTREE,
                                        '(objectclass=*)', ['cn'])
        ]
        # start deleting the leaves - which have the max number of ","
        leaves.sort(key=lambda x: x.count(","))
        while leaves and maxnum:
            # to avoid infinite loops
            # limit the iterations
            maxnum -= 1
            try:
                log.debug("removing %s" % leaves[-1])
                conn.delete_s(leaves[-1])
                leaves.pop()
            except:
                leaves.insert(0, leaves.pop())

        if not maxnum:
            raise Exception("BAD")
Beispiel #15
0
    def get_tarred_file(self):
        tar_file_link, intern_path = self.file_link.split("//")
        log.debug("Get file {} from tar.gz archive {}".format(intern_path,
                                                              tar_file_link))
        tar_base_name = os.path.basename(tar_file_link)
        tar_prefix = tar_base_name.split(".")[0]
        tar_root_dir = os.path.join(self.job_dir, tar_prefix)
        self.file_path = os.path.join(tar_root_dir, intern_path)

        if os.path.exists(self.file_path + ".gz"):
            log.debug("File {} is already downloaded".format(
                self.file_path + ".gz"))
            return self.file_path + ".gz"
        if not os.path.exists(tar_root_dir):
            os.makedirs(tar_root_dir)
        tar_file_path = os.path.join(self.job_dir, tar_base_name)
        if not os.path.exists(tar_file_path):
            web = Web(url=self.file_url)
            req = web.get()
            if req.status_code != 200:
                return None
            else:
                with open(tar_file_path, "wb") as f:
                    f.write(req.content)
        if self._extract(tar_file_path, tar_root_dir, intern_path):
            with open(self.file_path, 'rb') as f:
                with gzip.open(self.file_path + ".gz", 'wb') as zipped_file:
                    zipped_file.writelines(f)
            os.remove(self.file_path)
            self.file_path += ".gz"
            return self.file_path
        else:
            return None
Beispiel #16
0
    def post(self):
        """ parse request data and use model attribute info"""
        data = request.json
        for key, value in login.items():
            if key in data:
                data[value.attribute] = data.pop(key)
                #log.debug('{!s}: {!s}'.format(value.attribute, data[value.attribute]))
        """ retrieve user info """
        if 'PartitionKey' in data:
            if data['PartitionKey'] == "":
                data['PartitionKey'] = config['APPLICATION_CLIENT_ID']
        else:
            data['PartitionKey'] = config['APPLICATION_CLIENT_ID']

        loginuser = db.get(User(**data))
        """ user exists ? Create a new and  """
        if not db.exists(loginuser):
            loginuser.created = datetime.now()
            db.insert(loginuser)
        """ login user """
        #log.debug(loginuser.dict())
        g.user = loginuser
        token = generate_auth_token(loginuser)
        session['authtoken'] = token
        """ prepare return dict """
        data['loggedin'] = True
        data['timeout'] = 600

        log.debug(session)

        return data, 200
Beispiel #17
0
async def validation_exception_handler(_, exc):
    content = str(getattr(exc, 'orig', repr(exc)))
    log.debug(f"SQLAlchemy found an error: {content}")
    return JSONResponse(
        status_code=400,
        content={"detail": "Database Error", "body": content.split('\n')[0]},
    )
Beispiel #18
0
    def save(self, syncwithstorage=True):
        """ insert or merge self into storage """

        if syncwithstorage:
            """ try to merge entry """
            try:
                self._tableservice.insert_or_merge_entity(
                    self._tablename, self.__image__(entity=True))
                """ sync self """
                self.load()

            except AzureMissingResourceHttpError as e:
                log.error(
                    'can not insert or merge table entity:  Table {}, PartitionKey {}, RowKey {} because {!s}'
                    .format(self._tablename, self._PartitionKey, self._RowKey,
                            e))

        else:
            """ try to replace entry """
            try:
                self._tableservice.insert_or_replace_entity(
                    self._tablename, self.__image__(entity=True))
                self._existsinstorage = True

            except AzureMissingResourceHttpError as e:
                log.debug(
                    'can not insert or replace table entity:  Table {}, PartitionKey {}, RowKey {} because {!s}'
                    .format(self._tablename, self._PartitionKey, self._RowKey,
                            e))
Beispiel #19
0
    def get(self):
        """ list top recordings  with filters """
        """ retrieve Boards with filters """
        toplist = StorageTableCollection('recordings', "PartitionKey eq 'top'")
        toplist = db.query(toplist)
        toplist.sort(key=lambda item: item.beginn, reverse=True)
        """ apply filters """
        for key, value in request.args.items():
            if key == 'Genre':
                toplist.filter('genre', value)

            elif key == 'Channel':
                toplist.filter('sender', value)

            elif key == 'Sort':
                reverse = safe_cast(request.args.get('Descending', False),
                                    bool)
                field = recording[value].attribute
                log.debug('Sort field = {} with reverse {!s}'.format(
                    field, reverse))
                toplist = sorted(toplist,
                                 key=lambda k: k[field],
                                 reverse=reverse)

            elif key == 'Descending':
                if not request.args.get('Sort', False):
                    api.abort(403, __class__._responses['get'][403])
            else:
                api.abort(403, __class__._responses['get'][403])
        """ abort if no toplist filtered """
        if toplist == 0:
            api.abort(404, __class__._responses['get'][404])
        """ return list, httpstatus """
        return toplist, 200
Beispiel #20
0
def build_embedding_weight(dictlen, embedding_dim=EMBEDDING_LENGTH):
    """
    该函数从word2vec 模型中加载 训练好的 embedded vectors.
    :param dictlen:
    :param embedding_dim:
    :return:
    """
    if embedding_dim != EMBEDDING_LENGTH:
        raise TypeError(
            'word embedding length in model conflict with that in word2vec model'
        )

    log.debug(SAVED_WORD2VEC_PATH)
    wordvec = keyedvectors.KeyedVectors.load(SAVED_WORD2VEC_PATH)
    vocabulary = wordvec.index2word
    # 注意 vocabulary 是以 单词频率 降序排列的
    freq_dict = wordvec.vocab
    weight = np.zeros(shape=(dictlen + 1, embedding_dim), )
    for i in range(dictlen):
        weight[i + 1, :] = wordvec[vocabulary[i]]
        if (i % 1000 == 0):
            print('word %+12s frequency: %+6s' %
                  (vocabulary[i], freq_dict[vocabulary[i]]))
    return [
        weight,
    ]
Beispiel #21
0
def load_test(filepath, totalsize, maxlen, dictlen):
    wordvec = keyedvectors.KeyedVectors.load(SAVED_WORD2VEC_PATH)
    vocabulary = wordvec.vocab
    log.info('Length of vocabulary: %s' % len(vocabulary))
    index2word = wordvec.index2word
    log.info(
        'The first %d most frequent words selected, word with frequency under %d discarded.'
        % (dictlen, vocabulary[index2word[dictlen]].count))
    assert dictlen == vocabulary[index2word[dictlen]].index

    bigramer = phrases.Phraser.load(SAVED_BIGRAM_PATH)

    sentences = Sentences(filename=filepath, loop=False)
    comments = np.zeros(shape=(totalsize, maxlen), dtype=np.float32)
    ind = np.zeros(shape=(totalsize, 1), dtype=np.int32)
    count = 0
    full = 0
    empty = 0
    for i, sentence in enumerate(sentences):
        if len(sentence) == 0:
            print("Zero length sentences encountered: %s" % (i + 1, ))
            continue

        sentence = bigramer[sentence]

        log.debug('Length of sentence: %+3s' % len(sentence))
        j = 0
        for word in sentence:
            word_info = vocabulary.get(word)
            if word_info:
                word_id = word_info.index
                if word_id < dictlen:
                    comments[count, j] = word_id + 1
                    log.debug(
                        'The %+5s-th wordvector[%+2s] in one-hot representation: %+5s'
                        % (count, j, word_id))
                    j += 1
            else:
                continue
            if j == maxlen:
                full += 1
                break

        if j == 0:
            print("Sentence %s is empty after processing." % (i + 1, ))
            empty += 1
        else:
            # if 1403 < count < 1409:
            #     print(count, sentence, comments[count])
            ind[count] = i
            count += 1
        if count == totalsize:
            break
    log.info('%+5s out of %+5s in total are full while %s are empty.' %
             (full, totalsize, empty))
    # print(comments[1401:1408])
    inds = ind[:count]
    comments = comments[:count]
    return inds, comments
Beispiel #22
0
def start_transmission():
    try:
        """ create transmission-log """
        if not os.path.exists('/usr/log/transmission.log'):
            call = 'touch /usr/log/transmission.log'
            process = subprocess.run(call,
                                     shell=True,
                                     check=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            call = 'chown -R debian-transmission:debian-transmission /usr/log'
            process = subprocess.run(call,
                                     shell=True,
                                     check=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
        """ update ACL for transmission access """
        call = 'chown -R debian-transmission:debian-transmission ' + config[
            'APPLICATION_PATH_TORRENTS']
        process = subprocess.run(call,
                                 shell=True,
                                 check=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
        call = 'chown -R debian-transmission:debian-transmission ' + config[
            'APPLICATION_PATH_OTRKEYS']
        process = subprocess.run(call,
                                 shell=True,
                                 check=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
        call = 'chown -R debian-transmission:debian-transmission ' + config[
            'APPLICATION_PATH_VIDEOS']
        process = subprocess.run(call,
                                 shell=True,
                                 check=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
        """ restart transmission service """
        call = 'service transmission-daemon start'
        log.debug(call)
        process = subprocess.run(call,
                                 shell=True,
                                 check=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
        time.sleep(5)
        log.info('init transmission-deamon finished. Returns {!s}'.format(
            process.stdout.decode(encoding='utf-8')))
        return True

    except subprocess.CalledProcessError as e:
        log.error('init transmission-deamon failed with cmd:{!s} because {!s}'.
                  format(e.cmd, e.stderr))
        return False

    except:
        log.exception('start transmission failed... ')
        return False
Beispiel #23
0
def get_user_watchlist_ids(db: Session, user_id: int) -> List[int]:
    log.info("Searching watchlist for user: {%s}", user_id)
    watchlist = [] if not user_id else [
        i for i, in db.query(models.Watchlist.movie).filter_by(
            user=user_id).all()
    ]
    log.debug("Retrieved %s elements in user's watchlist", len(watchlist))
    return watchlist
Beispiel #24
0
def handler(session):
    ''' Playback own Number to Caller '''
    log.debug('Handler for ext 100, Playback own Number to Caller')
    calling_number = session.getVariable('caller_id_number')
    session.answer()
    session.execute('playback', '014_su_numero_es.gsm')
    session.execute('say', 'es number iterated %s' % calling_number)
    session.hangup()
Beispiel #25
0
 def exe(self, cmd):
     log.debug("Executing cmd by ssh: {cmd}".format(cmd=cmd))
     try:
         stdin, stdout, stderr = self.ssh_cl.exec_command(cmd)
     except paramiko.ssh_exception.SSHException as e:
         log.error("SSH command failed: {}\n{}".format(cmd, e))
         return None, None, None
     return stdin, stdout.read(), stderr.read()
Beispiel #26
0
async def fetch_one(session, feed, client, database, queue):
    """Fetch a single feed"""
    url = feed['url']
    checksum = feed.get('checksum', None)
    changed = False
    headers = {}

    await publish(queue, 'ui', {'event': 'fetch_one', 'url': url})
    log.debug("Fetching %s", url)

    if 'etag' in feed:
        headers['etag'] = feed['etag']
    if 'last_modified' in feed:
        headers['if-modified-since'] = feed['last_modified']

    try:
        async with session.get(url, headers=headers) as response:
            text = await response.text()
            # TODO: check behavior for 301/302
            update = {
                'last_status': response.status,
                'last_fetched': datetime.now(),
            }
            await publish(queue, 'ui', {
                'event': 'fetch_result',
                'url': url,
                'status': response.status
            })
            if response.status == 200:
                if 'checksum' not in feed or feed['checksum'] != checksum:
                    changed = True
                update['raw'] = text
                update['checksum'] = sha1(text.encode('utf-8')).hexdigest()

            if 'etag' in response.headers:
                update['etag'] = response.headers['etag']

            if 'last-modified' in response.headers:
                update['last_modified'] = response.headers['last-modified']

            await do_update(database.feeds, {'url': url}, {'$set': update})

            if changed:
                await enqueue(queue, 'parser', {
                    "_id": feed['_id'],
                    "scheduled_at": datetime.now()
                })
            return feed, response.status

    except Exception:
        log.error(format_exc())
        await do_update(
            database.feeds, {'url': url},
            {'$set': {
                'last_status': 0,
                'last_fetched': datetime.now()
            }})
        return feed, 0
Beispiel #27
0
async def init_connections(sanic, loop):
    """Bind the database and Redis client to Sanic's event loop."""

    global redis, db
    redis = await connect_redis()
    motor = AsyncIOMotorClient(MONGO_SERVER, io_loop=loop)
    db = motor[DATABASE_NAME]
    log.debug("adding task")
    app.add_task(loop.create_task(monitor_loop()))
Beispiel #28
0
    def loadtorrents(self):
        """ set filter conditions """
        filter = "PartitionKey eq '{!s}'".format(self.RowKey)
        log.debug(filter)
        self._Torrents = StorageTableCollection(self._tableservice,
                                                self._tablename,
                                                filter).list()

        pass
Beispiel #29
0
    def sync_header_chain(cls, path, bitcoind_server, last_block_id):
        """
        Synchronize our local block headers up to the last block ID given.
        """
        current_block_id = SPVClient.height(path)
        if current_block_id < last_block_id:

            log.debug("Synchronize %s to %s" %
                      (current_block_id, last_block_id))

            # need to sync
            prev_block_header = SPVClient.read_header(path, current_block_id)
            prev_block_hash = prev_block_header['hash']

            # connect
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.connect((bitcoind_server, 8333))
            client = BlockHeaderClient(sock, path)
            client.handshake()

            # request first batch
            client.send_getheaders(prev_block_hash)

            while True:

                # next message
                message_header, message = client.receive_message()

                if not message:
                    continue

                # dispatch message
                handle_func_name = "handle_" + message_header.command
                handle_func = getattr(client, handle_func_name, None)
                if handle_func:
                    handle_func(message_header, message)

                if message_header.command == "headers":
                    # got reply to our getheaders request.
                    # pipe the next one in
                    current_block_id = SPVClient.height(path)
                    prev_block_header = SPVClient.read_header(
                        path, current_block_id)
                    prev_block_hash = prev_block_header['hash']
                    client.send_getheaders(prev_block_hash)

                # synchronized?
                if SPVClient.height(path) >= last_block_id:
                    break

            # verify headers
            rc = SPVClient.verify_header_chain(path)
            if not rc:
                raise Exception("Failed to verify headers (stored in '%s')" %
                                path)

        return True
Beispiel #30
0
 def f_retry(*args, **kwargs):
     for i in range(attempts):
         try:
             return func(*args, **kwargs)
         except on_exception as e:
             log.debug("retry %d for %s(%s, %s), waiting %d" %
                       (i, func, args, kwargs, interval))
             sleep(interval)
             continue
Beispiel #31
0
def is_word_type(file_path):
    '''判断文件是否是word文档类型'''
    if not os.path.isfile(file_path):
        log.warn('s3_local_file not exists: %s' % file_path)
        return False
    has_doc_tag = file_path.endswith('.doc') or file_path.endswith('.docx')  # 根据后缀判断类型
    if has_doc_tag:
        log.debug('file_type:ms_word,file_path:%s' %  file_path)
        return True
    return False
Beispiel #32
0
def determine_particle(word: str, particle: str) -> str:
    if not particle.startswith('('):
        return word + particle

    try:
        ret = tossi.postfix(word, particle)
    except ValueError as e:
        log.exception(e)
        return word + particle
    log.debug(f'tossi: {word} / {particle} -> {ret}')
    return ret
async def get_geo_location(address, session):
    try:
        geo_location = GeoLocation.objects.filter(address=address).first()
        if not geo_location:
            google_response = await get_geo_code_by_address_async(
                address, session)
            geo_data = google_response[0].get('geometry').get('location')
            geo_location = __save_geo_location(geo_data, address)
        return geo_location
    except Exception as error:
        log.debug('Error to get geo_location_data', error.__str__(), scope)
Beispiel #34
0
    def query(self, storagecollection) -> StorageTableCollection:
        if isinstance(storagecollection, StorageTableCollection):
            try:
                storagecollection.extend(self._tableservice.query_entities(storagecollection._tablename,storagecollection._filter))

            except AzureMissingResourceHttpError as e:
                log.debug('can not query table {} with filters {} because {!s}'.format(storagecollection._tablename, storagecollection._filter, e))            

            return storagecollection
        else:
            return None
Beispiel #35
0
    def sync_header_chain(cls, path, bitcoind_server, last_block_id ):
        """
        Synchronize our local block headers up to the last block ID given.
        """
        current_block_id = SPVClient.height( path )
        if current_block_id < last_block_id:
           
            log.debug("Synchronize %s to %s" % (current_block_id, last_block_id))

            # need to sync
            prev_block_header = SPVClient.read_header( path, current_block_id )
            prev_block_hash = prev_block_header['hash']

            # connect 
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.connect( (bitcoind_server, 8333) )
            client = BlockHeaderClient( sock, path )
            client.handshake()

            # request first batch 
            client.send_getheaders( prev_block_hash )

            while True:

                # next message
                message_header, message = client.receive_message()

                if not message:
                    continue

                # dispatch message
                handle_func_name = "handle_" + message_header.command
                handle_func = getattr(client, handle_func_name, None)
                if handle_func:
                    handle_func(message_header, message)
               
                if message_header.command == "headers":
                    # got reply to our getheaders request.
                    # pipe the next one in
                    current_block_id = SPVClient.height( path )
                    prev_block_header = SPVClient.read_header( path, current_block_id )
                    prev_block_hash = prev_block_header['hash']
                    client.send_getheaders( prev_block_hash )

                # synchronized?
                if SPVClient.height( path ) >= last_block_id:
                    break

            # verify headers 
            rc = SPVClient.verify_header_chain( path )
            if not rc:
               raise Exception("Failed to verify headers (stored in '%s')" % path)

        return True
Beispiel #36
0
    def __new__(cls, *args, **kwargs):
        """ This class is Singleton, return only one instance """
        if not cls.__instance:
            cls.__instance = super(GitDeploy, cls).__new__(cls, *args,
                                                           **kwargs)

            # Call config
            cls.__instance._configure(**kwargs)

            log.debug('{0} :: Config - {1}'.format(__name__,
                      str(cls.__instance.config)))
        return cls.__instance
Beispiel #37
0
 def __init__(self,
              host, port, user, timeout=None, key=None, key_path=None):
     self.ssh_cl = paramiko.SSHClient()
     self.ssh_cl.set_missing_host_key_policy(paramiko.AutoAddPolicy())
     log.debug("Executing ssh {user}@{host}:{port}".format(
         user=user, host=host, port=port))
     self.ssh_cl.connect(hostname=host,
                         port=port,
                         username=user,
                         timeout=timeout,
                         pkey=key,
                         key_filename=key_path)
Beispiel #38
0
 def _extract(self, tar, root_dir, file_path):
     log.debug("Extracting file {} from {} in {}".format(
         file_path, tar, root_dir))
     try:
         with contextlib.closing(lzma.LZMAFile(tar)) as xz:
             with tarfile.open(fileobj=xz) as f:
                 f.extract(file_path, path=root_dir)
         return True
     except Exception as e:
         log.error("Error when untarring file {} from {} in {}:{}".format(
             file_path, tar, root_dir, e))
         return False
Beispiel #39
0
    def handle_headers( self, message_header, block_headers_message ):
        """
        Handle a 'headers' message.
        NOTE: we request headers in order, so we will expect to receive them in order here.
        Verify that we do so.
        """
        block_headers = block_headers_message.headers
        current_height = SPVClient.height( self.path )
        log.debug("Receive %s headers (%s to %s)" % (len(block_headers), current_height, current_height + len(block_headers)))

        serializer = BlockHeaderSerializer()

        # verify that the local header chain connects to this sequence
        last_header = SPVClient.read_header( self.path, current_height )

        if last_header['hash'] != self.hash_to_string(block_headers[0].prev_block):
            raise Exception("Received discontinuous block header '%s' (expected '%s')" % \
                    (self.hash_to_string(block_headers[0].prev_block),
                    last_header['hash'] ))

        # verify that this sequence of headers constitutes a hash chain 
        for i in xrange(1, len(block_headers)):
            prev_block_hash = self.hash_to_string(block_headers[i].prev_block)
            if prev_block_hash != block_headers[i-1].calculate_hash():
                raise Exception("Block '%s' is not continuous with block '%s'" % \
                        prev_block_hash,
                        block_headers[i-1].calculate_hash())

        # insert into to local headers database
        next_block_id = current_height + 1
        for block_header in block_headers:
            with open(self.path, "r+") as f:

                # omit tx count 
                block_header.txns_count = 0
                bin_data = serializer.serialize( block_header )

                if len(bin_data) != BLOCK_HEADER_SIZE:
                    raise Exception("Block %s (%s) has %s-byte header" % (next_block_id, block_header.calculate_hash(), len(bin_data)))

                # NOTE: the fact that we use seek + write ensures that we can:
                # * restart synchronizing at any point
                # * allow multiple processes to work on the chain safely (even if they're duplicating effort)
                f.seek( BLOCK_HEADER_SIZE * next_block_id, os.SEEK_SET )
                f.write( bin_data )

                next_block_id += 1
Beispiel #40
0
    def get(self, ignore404=False):
        """
            Sometimes console.html is gzipped on logs server and console.html
            is not available anymore, so here it silently fails when trying to
            download console.html and then tries to get console.html.gz
            We don't want redundant error messages in console

        :param ignore404: not to show error message if got 404 error
        :return: request obj
        """
        log.debug("GET {url} with ignore404={i}".format(
            url=self.url, i=str(ignore404)))
        req = requests.get(self.url)
        if req.status_code != 200:
            if not (ignore404 and req.status_code == 404):
                log.error("Page {url} got status {code}".format(
                    url=self.url, code=req.status_code))
        return req
Beispiel #41
0
 def _get_console(self, job):
     path = os.path.join(
         self.down_path, job["log_hash"], "console.html.gz")
     if os.path.exists(path):
         log.debug("Console is already here: {}".format(path))
         return path
     web = Web(job["log_url"] + "/console.html")
     req = web.get(ignore404=True)
     if req.status_code == 404:
         url = job["log_url"] + "/console.html.gz"
         web = Web(url=url)
         log.debug("Trying to download gzipped console")
         req = web.get()
     if req.status_code != 200:
         log.error("Failed to retrieve console: {}".format(job["log_url"]))
         return None
     else:
         if not os.path.exists(os.path.dirname(path)):
             os.makedirs(os.path.dirname(path))
         with gzip.open(path, "wb") as f:
             f.write(req.content)
     return path
Beispiel #42
0
def pretty_dump(data):
    """ format data
    """

    if type(data) is list:

        if len(data) == 0:
            # we got an empty array
            data = {}
        else:
            # Netstring server responds with [{data}]
            log.debug("converting [] to {}")
            data = data[0]

    if type(data) is not dict:
        try:
            data = json.loads(data)
        except Exception as e:
            # data is not valid json, convert to json
            data = {'result': data}

    return json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
Beispiel #43
0
    def _check_lock(self):
        """ Returns boolean flag on lock file existence """
        cmd = "ls {0}{1}{2}".format(
            self.config['path'],
            self.DEPLOY_DIR,
            self._get_lock_file_name())

        log.debug('{0} :: Executing - "{1}"'.format(__name__, cmd))
        ret = self.ssh_command_target(cmd)

        # Pull the lock file handle from
        try:
            file_handle = ret['stdout'][0].split('/')[-1].strip()
        except (IndexError, ValueError):
            log.debug('{0} :: Could not extract '
                      'the lock file name.'.format(__name__))
            return False

        if file_handle == self._get_lock_file_name():
            return True
        else:
            return False
Beispiel #44
0
 def get_regular_file(self):
     log.debug("Get regular file {}".format(self.file_link))
     self.file_name = os.path.basename(self.file_link).rstrip(".gz") + ".gz"
     self.file_path = os.path.join(self.job_dir, self.file_name)
     if os.path.exists(self.file_path):
         log.debug("File {} is already downloaded".format(self.file_path))
         return self.file_path
     else:
         web = Web(url=self.file_url)
         ignore404 = self.file_link == "/console.html"
         req = web.get(ignore404=ignore404)
         if req.status_code != 200 and self.file_link == "/console.html":
             self.file_url += ".gz"
             web = Web(url=self.file_url)
             log.debug("Trying to download gzipped console")
             req = web.get()
         if req.status_code != 200:
             log.error("Failed to retrieve URL: {}".format(self.file_url))
             return None
         else:
             with gzip.open(self.file_path, "wb") as f:
                 f.write(req.content)
         return self.file_path
Beispiel #45
0
 def __init__(self, driver):
     self.driver = driver
     class_name = self.__class__.__name__
     log.debug("Wrap {} inside {}".format(driver.current_url, class_name))
Beispiel #46
0
def analyze(job, down_path):
    def line_match(pat, line):
        if isinstance(pat, re._pattern_type):
            if not pat.search(line):
                return False
            elif pat.search(line).groups():
                return pat.search(line).group(1)
            else:
                return True
        if isinstance(pat, str):
            return pat in line

    message = {
        "text": '',
        "tags": set(),
        "msg": set(),
        "reason": True,
        "job": job,
        "periodic": "periodic" in job.name,
        'patterns': set(),
    }
    templ = ("{date}\t"
             "{job_type:38}\t"
             "{delim}\t"
             "{msg:60}\t"
             "{delim}\t"
             "log: {log_url}")

    msg = set()
    console = JobFile(job, path=down_path).get_file()
    if not console:
        message['text'] = 'No console file'
        message['msg'] = set(message['text'])
        message['tags'] = ['infra']
        message['reason'] = True
        return message
    files = PATTERNS.keys()
    for file in files:
        jfile = JobFile(job, path=down_path, file_link=file).get_file()
        if not jfile:
            log.error("File {} is not downloaded, "
                      "skipping its patterns".format(file))
            continue
        else:
            try:
                log.debug("Opening file for scan: {}".format(jfile))
                for line in fileinput.input(
                        jfile, openhook=fileinput.hook_compressed):
                    for p in PATTERNS[file]:
                        if (line_match(p["pattern"], line) and
                                p["msg"] not in msg):
                            log.debug("Found pattern {} in file {}:{}".format(
                                repr(p), file, jfile))
                            msg.add(p["msg"].format(
                                line_match(p["pattern"], line)))
                            message['tags'].add(p["tag"])
                            message['patterns'].add(p['id'])

            except Exception as e:
                log.error("Exception when parsing {}: {}".format(
                    jfile, str(e)))
                msg = {"Error when parsing logs. Please investigate"}
                message['reason'] = False
                message['tags'].add("unknown")
    if not msg:
        log.debug("No patterns in job files {}".format(job))
        msg = {"Reason was NOT FOUND. Please investigate"}
        message['reason'] = False
        message['tags'].add("unknown")
    message['msg'] = msg
    message['text'] = templ.format(
        msg=" ".join(sorted(msg)),
        delim="||" if message['reason'] else "XX",
        date=job.datetime,
        job_type=job.name,
        log_url=job.log_url
    )
    return message
Beispiel #47
0
 def close(self):
     log.debug("Closing SSH connection")
     self.ssh_cl.close()