예제 #1
0
 def create_market(self, data: dict):
     self.validations.new_market(data=data)
     self.validations.is_valid_registry(registry=data.get('registro'))
     LOGGER.debug('Creating new market service')
     new_market = Market(**data)
     market = self.database.insert_market(market=new_market)
     return self.parser.object_to_json(market=market)
예제 #2
0
 def update_market(self, data: dict, registry: str):
     self.validations.update_market(data=data)
     self.validations.is_valid_registry(registry=registry)
     LOGGER.debug('Updating new market service')
     market = self.database.get_market_by_registry(registry=registry)
     market = self.database.update_market(market=market, data=data)
     return self.parser.object_to_json(market=market)
예제 #3
0
def takeAll(f1,f2,*args,**kwargs):
  c1,c2 = f1['content'],f2['content']

  # Assert:
  # 1. Content is a list
  # 2. There is only one type of element in each list
  assert type(c1)==type(c2)==list
  assert len(set([type(i) for i in c1]))==len(set([type(i) for i in c1]))==1

  #If the elements aren't dicts, simply return the union
  if not isinstance(c1[0], dict):
    return list( set(c1).union(c2) ) 

  #If the elements are dicts, we need to deconstruct each dict, compare if it is duplicated, and then re-constuct
  #We won't go deeper than the k,v pair: ie, we will ignore if v is a nested structure
  elif isinstance(c1[0], dict):
    result = {}
    for L in [c1,c2]:
      L = L if isinstance(L,list) else [L]
      for D in L:
        for k,v in D.iteritems():
          v = v if isinstance(v,list) else [v]
          if k not in result:
            result[k] = []
          if v not in result[k]:
            result[k].append(v)
    for k,v in result.iteritems():
      #Flatten the values of the dict
      result[k] = list(itertools.chain(*v))
    return {'content':[result], '@origin': '%s; %s' % (f1['@origin'],f2['@origin'])}

  #If elements are neither, we have a problem!
  LOGGER.critical('takeAll merger didnt get normalized data')
  raise TypeError (c1,c2)
예제 #4
0
    def update_app(os_name: str) -> None:
        """
        Скачивание программы обновления и запуск обновлений
        :param os_name: имя OS
        :return:
        """
        LOGGER.info(f'Клонируем проект {os_name}')

        response = requests.get(REPO_URL_UPDATER)

        with tempfile.TemporaryFile() as file:
            file.write(response.content)
            with zipfile.ZipFile(file) as fzip:
                fzip.extractall(path)

        if os_name == 'Windows':
            command = os.path.join(path_to_updater, UPDATE_WIN)
            subprocess.Popen(command, cwd=path_to_updater)
            exit_ex()

        if os_name == 'Linux':
            os.system(f'chmod -R 775 {path_to_updater}')

            showwarning(
                'Обновление', WARNING_MSG['Update_app']['Linux'].format(
                    branch=REPO_BRANCH_UPDATER, file=UPDATE_LINUX))
            exit_ex()
예제 #5
0
    def __init__(self, ignore_existing_token: bool = False):
        self.logger = LOGGER('config_vk_api', 'vk_api')
        get_requests_db = GetRequestsToDB()
        user_data_table_value = get_requests_db.get_records(
            tb_name=get_requests_db.userdata,
            one_record=True,
            select=['access_token'])
        token = user_data_table_value['access_token']
        self.__additional_windows = AdditionalWindows

        if ignore_existing_token is False:
            if (token is None) or (token == DEFAULT_VALUE_FOR_BD):
                token = self.get_token()
        else:
            token = self.get_token()

        if (token is not None) or (token != DEFAULT_VALUE_FOR_BD):
            is_donat = self.check_is_donat(token)
            if is_donat is False:
                token = None

        self.token = token

        if self.token is not None:
            vk_session = vk_api.VkApi(token=self.token)
            self.vk_tool = vk_api.tools.VkTools(vk_session)

            if ignore_existing_token is True:
                showinfo('Авторизовались', 'Вы удачно авторизовались!')
            self.logger.info('Получен vk_tool и сам токен')
        else:
            self.logger.error('vk_tool не удалось получить')
            self.vk_tool = None

        del get_requests_db, user_data_table_value
예제 #6
0
파일: run.py 프로젝트: P-Programist/Upwork
    def get_race_info_before(self, html) -> dict:
        data_list = []
        soup = BS(html, "html.parser")

        main_window = soup.find("div", attrs={"id": "PassBody"})

        if not main_window:
            LOGGER.warning(
                "Could not find PassBody because of the race did not start yet"
            )
            return []

        main_table = main_window.find("table", attrs={"id": "race-pass-body"})
        tbodies = main_table.find_all("tbody", class_="rp-table-row")

        local_race_data = {}

        runners = len(tbodies)
        local_race_data["runners"] = runners

        local_race_data.update(self.date_and_time)
        row = list(local_race_data.values())
        data_list.append(row[1:] + row[:1])

        return data_list
예제 #7
0
파일: sec.py 프로젝트: EricChea/secrss_feed
    def refresh(self, attempts=10):
        """Refreshes the rss feed.

        Refreshes the contents received from the RSS feed and sets 'self.feed'
        to the new feed contents.

        Args:
            attempts (int): number of attempts to refresh contents before
                stopping stopping.

        """

        for _ in range(attempts):

            feed_update = feedparser.parse(self.rss_url)

            # feedparser creates a 'status' attribute if there is a response
            if 'status' in feed_update:
                if feed_update.status == 304:
                    continue
                elif feed_update.status == 200:
                    self.feed = feed_update
                    return
            else:
                LOGGER.error(f"feed_update: {feed_update.bozo_exception}")

        LOGGER.info(f"{attempts} unnsuccessful attempts at refreshing data.")
예제 #8
0
 def get_customer_by_id(self, customer_id: str) -> Customer:
     LOGGER.debug(f'Getting customer {customer_id} in database')
     customer = Customer.objects(id=customer_id).first()  # pylint: disable=no-member
     if not customer:
         LOGGER.debug('None customer found to customer_id informed')
         raise NotFoundException
     return customer
예제 #9
0
파일: sec.py 프로젝트: EricChea/secrss_feed
def create_entry(data):
    """Creates a key/value store of data for a SEC filing

    The data that comes from the RSS feed does not contain detailed information
    on the owner of the new issuance, and details about the issuer such as their
    stock ticker.  This function creates a filing entry first hierchy that are
    described by the filing attributes such as issuer, owner, and the details
    of each transaction.

    Args:
        data (feedparser.FeedParserDict): a key/value store that desribes the
            typical features of an RSS feed.  Derived from parsing an RSS feed.

    Returns:
        dict: a key/value store that describes the attributes of an SEC filing.

    """

    for tag in data.tags:
        form_type = tag.term if tag.label == 'form type' else None

    if form_type is None:
        LOGGER.info("No form type detected: {accession_num}.")

    return dict(
        created=datetime.now(),
        accession_num=data.summary.split('<b>AccNo:</b> ')[1][:20],
        updated=data.updated,
        datalink=data.links[0].href,
        issuer=dict().fromkeys(ISSUER_DSCR + ADDRESS_DSCR, None),
        owner=dict().fromkeys(OWNER_DSCR + ADDRESS_DSCR, None),
        nonderivatives=[],
        derivatives=[],
        form_type=form_type,
    )
예제 #10
0
 def get_product_by_id(self, product_id: str) -> Product:
     LOGGER.debug(f'Getting product {product_id} in database')
     product = Product.objects(id=product_id).first()  # pylint: disable=no-member
     if not product:
         LOGGER.debug('None product found to product_id informed')
         raise NotFoundException
     return product
예제 #11
0
    def delete_value(self):
        """
        Удаляет выбранную запись
        :return:
        """
        try:
            values = self.get_choose_value()
            pk = values['pk']

            LOGGER.warning(
                'Запрос на удаление элемента в таблице GetRequestsApi')
            delete_request_db = DeleteRequestsToDB()
            delete_request_db.delete_record_in_bd(
                tb_name=delete_request_db.get_requests, where=f'pk={pk}')
            delete_request_db.delete_record_in_bd(
                tb_name=delete_request_db.additional_get_requests,
                where=f'pk_attachment={pk}')
        except IndexError as error:
            if str(error) == 'не выбран элемент':
                return
        for row in self.tree_view.get_children():
            self.tree_view.delete(row)

        del self.values
        gc.collect()
        self.__get_records__()
        self.completion_tree_view()
예제 #12
0
    def do(self):
        """
        implements the behavior of rick

        whenever Rick has moved already, his behavior is skipped for the
        current timestep. Otherwise, when rick has no zombies around him,
        he is happy and does nothing. If there are zombies around, rick tries
        to find a candidate to swap position with. The candidate he looks for
        is an enemy of his neighborhood, namely the one with the least amount
        of zombies in his neighborhood. If rick doesn't find an enemy,
        he looks for a friend to swap with.
        """
        if self._skip:
            return

        if len(self.getNeighborNodes(ZombieFactory.getZombieType())) == 0:
            LOGGER("No Zombies nearby. %s feels safe and does nothing" % self,
                   Logger.LEVEL_DETAILS)
            return

        # when there are any zombies, rick tries to swap with an enemy
        candidate = self.__get_candidate_to_sacrifice(EnemyOfRick)

        # if there is no enemy around, he simply sacrifices a friends life
        if candidate is None:
            candidate = self.__get_candidate_to_sacrifice(FriendOfRick)

        # if he finds someone to swap, he swaps
        if candidate is None:
            LOGGER("%s couldn't seems to be surronded by zombies. This is "
                   "gonna be tough" % self, Logger.LEVEL_DETAILS)
        else:
            LOGGER("%s swaps position with %s" % (self, candidate),
                   Logger.LEVEL_DETAILS)
            self.swapPosition(candidate)
예제 #13
0
def word_tokenize_list(sentences_list):
    tokenized_list = list()
    total = len(sentences_list)
    for index, each in enumerate(sentences_list):
        tokenized_list.append(word_tokenize(each))
        LOGGER.info('tokenized : {}/{}'.format(index, total))

    return tokenized_list
예제 #14
0
 def update_customer_by_id(self, customer_id: str, data: dict) -> Customer:
     LOGGER.debug(f'Updating customer {customer_id} in database')
     customer = Customer.objects(id=customer_id).first()  # pylint: disable=no-member
     if not customer:
         raise NotFoundException
     customer.update(**data)  # pylint: disable=no-member
     customer = Customer.objects(id=customer_id).first()
     return customer
예제 #15
0
 def update_product_by_id(self, product_id: str, data: dict) -> Product:
     LOGGER.debug(f'Updating product {product_id} in database')
     product = Product.objects(id=product_id).first()  # pylint: disable=no-member
     if not product:
         raise NotFoundException
     product.update(**data)  # pylint: disable=no-member
     product = Product.objects(id=product_id).first()
     return product
예제 #16
0
 def about():
     about = {
         "version": PINE_EVE_VERSION_STR,
         "eve_version": eve.__version__,
         "flask_version": flask_version
     }
     LOGGER.info(about)
     return jsonify(about)
 def send(self, msg, receiver='all'):
     if receiver in self.receivers:
         message = '{}: {}'.format(receiver, msg)
         self.last_message = message
         logger.debug("ZMQPublisher: message={}".format(message))
         self.socket.send_string(message)
     else:
         raise ValueError, 'receiver is not correct'
예제 #18
0
 def get_products(self, page: int) -> (list, int):
     LOGGER.debug(f'Getting products page {page}')
     products_total = Product.objects().count()
     if page == 1:
         products = Product.objects().limit(LIMIT_PER_PAGE)
     else:
         products = Product.objects.skip(
             (page - 1) * LIMIT_PER_PAGE).limit(page * LIMIT_PER_PAGE)
     return products, products_total
예제 #19
0
파일: run.py 프로젝트: P-Programist/Upwork
    def get_race_venues(self, html, today=False) -> list:
        race_links = []

        if not html:
            LOGGER.info("Cannot scrape the HTML for %s" % self.url)
            return race_links

        soup = BS(html, "html.parser")

        if not today:
            race_results_window = soup.find('section',
                                            attrs={"id": 'archiveFormList'})
            result_card_sections = race_results_window.find_all(
                'div',
                class_='w-results-holder')[1:] if race_results_window else []
            ttl_lnk = 'results-title'
        else:
            race_results_window = soup.find('div', class_='w-cards-results')
            result_card_sections = race_results_window.find_all(
                'section') if race_results_window else []
            ttl_lnk = 'w-racecard-grid-race-inactive'

        for result_card_section in result_card_sections:
            if not today:
                sections = result_card_section.find_all('section')
            else:
                sections = result_card_sections

            for section in sections:
                title_links = section.find_all(
                    'a', class_=ttl_lnk) + section.find_all(
                        'a', class_='w-racecard-grid-race-result')

                if not today:
                    if section.h2:
                        if 'IRE' in section.h2.text:
                            break
                else:
                    if 'IRE' in section.h3.a.text:
                        break

                for title_link in title_links:
                    url = 'https://www.timeform.com' + title_link['href']

                    if not today:
                        race_title_text = title_link.text.upper()
                    else:
                        race_title_text = title_link['title'].upper()

                    cleaned_race_definition_word_list = race_title_text.split()

                    race_type_name = get_race_type_name(
                        cleaned_race_definition_word_list)

                    race_links.append((url, race_type_name.replace('\'', '')))

        return race_links
예제 #20
0
 def __set_difference(self,
                      field_set: set,
                      data: dict,
                      is_update: bool = False):
     difference = field_set.difference(data.keys())
     if difference and not is_update:
         LOGGER.debug('Missing field in data payload')
         raise MissingRequiredFieldsException(fields=difference)
     self.__fields_to_remove(data=data, field_set=field_set)
예제 #21
0
 def cancel(self):
     """
     Обработка нажатия отмены введения токена
     :return:
     """
     self.token = DEFAULT_VALUE_FOR_BD
     messagebox.showwarning('Отмена авторизации',
                            WARNING_MSG['VK_API']['cancel_get_token'])
     LOGGER.warning('Во время выполнения метода get_token, он был отменён')
     self.token_window.destroy()
예제 #22
0
def init_publisher():
    publisher = ZMQPublisher()
    while True:
        try:
            # Let's do something here.... read from com port or ...etc...
            barcode = hashlib.sha256(os.urandom(30).encode('base64')[:-1]).hexdigest()[:10]
            publisher.send(barcode, random.choice(['gui', 'all']))
            time.sleep(0.1)
        except KeyboardInterrupt:
            logger.debug('init_publisher while loop is stopping')
            break
 def test_stupid(self):
     publisher = ZMQPublisher()
     receivers = ['gui', 'all']
     for i in xrange(10):
         barcode = hashlib.sha256(os.urandom(30).encode('base64')[:-1]).hexdigest()[:10]
         try:
             publisher.send(barcode, random.choice(receivers))
             time.sleep(1)
         except Exception as e:
             logger.debug(e)
             self.assertTrue(False, 'something wrong')
예제 #24
0
def init_subscribe(queue):
    subscriber = ZMQSubscriber(queue=queue)
    i = 10
    while i > 0:
        if not queue.empty():
            logger.debug('message through queue={}'.format(queue.get()))
        else:
            logger.debug('queue is empty')
            time.sleep(0.2)
        i -= 1
    time.sleep(5)
    subscriber.stop()
예제 #25
0
 def __init__(self, schema_name):
     '''
     initialization of schema name
     :param schema_name:
     '''
     self.schema = SCHEMAS[schema_name]
     LOGGER.info("initated schema is:{0}".format(schema_name))
     self.ignore_keys = ["default", "unique", "data_relation"]
     self.schema = self.delete_keys_from_dict(self.schema, self.ignore_keys)
     self.schema_name = schema_name
     LOGGER.info("validating schema:{0}".format(self.schema))
     self.v = FabDataValidator(self.schema)
예제 #26
0
    def insert_many_values_into_get_requests(self, type_request: str,
                                             count: int, response: List[dict],
                                             time: float,
                                             last_parse: int) -> None:
        """
        Функция вставки данных в таблицу GET запросов к Vk в том случае.
        Создана потому что Windows плохо работает если нужно большое
        количество данных вставить в БД
        :param type_request: тип парсинга
        :param count: количество людей
        :param response: результат выполнения парсинга
        :param time: время парсинга
        :param last_parse: возможен ли дальнейший парсинг по данным
        :return:
        """
        if count <= COUNT_MANY_INSERT:
            # Если норм количество людей в записи
            peoples = json.dumps(response, ensure_ascii=False)
            self.insert_in_table(
                tb_name=self.get_requests,
                data=[type_request, count, peoples, time, last_parse])
        else:
            # Если слишком много людей в записи
            self.insert_in_table(  # Вставка заглушки в основную тб
                tb_name=self.get_requests,
                data=[type_request, count, REQUIRES_DATA, time, last_parse])
            get_request_db = GetRequestsToDB()
            pk = get_request_db.get_records(tb_name=self.get_requests,
                                            select=['pk'],
                                            one_record=True,
                                            order='pk DESC')
            get_request_db.connect_bd.close()
            attachment_pk = int(pk['pk'])
            slice_from = 0
            slice_to = COUNT_MANY_INSERT + 1

            while True:
                peoples = response[slice_from:slice_to]
                peoples = json.dumps(peoples, ensure_ascii=False)[1:-1]
                self.insert_in_table(tb_name=self.additional_get_requests,
                                     data=[attachment_pk, peoples])

                if slice_to == count + 1:
                    break
                slice_from = slice_to
                if slice_to + COUNT_MANY_INSERT + 1 > count + 1:
                    slice_to = count + 1
                else:
                    slice_to += COUNT_MANY_INSERT + 1

        LOGGER.warning('Очистка данных')
        del type_request, count, response, time, last_parse, peoples
        gc.collect()
예제 #27
0
    def delete_all_records(self, name_table: str) -> None:
        """
        Функция удаления всех записей в таблице
        :param name_table: имя очищаемой таблицы
        :return: None
        """
        LOGGER.warning(f'Начинаю удаление таблицы {name_table}')
        self.remote_control_bd.execute(f'DROP TABLE IF EXISTS {name_table}')
        self.connect_bd.commit()

        MainDB()
        LOGGER.info('Успешно удалена')
예제 #28
0
 def show_train_stats(epoch, iteration, losses, y_true, y_pred):
     # compute mean statistics
     loss = np.mean(losses)
     accuracy = accuracy_score(y_true, y_pred)
     LOGGER.info(
         'Epoch={}, Iter={:,}, Mean Training Loss={:.4f}, Accuracy={:.4f}, '
         .format(epoch, iteration, loss, accuracy))
     add_metric_summaries('train', iteration, {
         'cross_entropy': loss,
         'accuracy': accuracy
     })
     LOGGER.info('\n{}'.format(
         classification_report(y_true, y_pred, digits=3)))
예제 #29
0
def get_log_object(fname='log.log', loggername='pipeline'):
    import logging
    import os

    def add_file_handler(fname):
        file_handler = logging.FileHandler(fname)
        file_handler.setLevel(logging.DEBUG)
        file_handler.setFormatter(fileformatter)
        logger.addHandler(file_handler)

    class MyFormatter(logging.Formatter):
        def format(self, record):
            try:
                msg = record.msg.split(':', 1)
                if len(msg) == 2:
                    record.msg = '[{:<15}]{}'.format(msg[0], msg[1])
            except:
                pass
            return logging.Formatter.format(self, record)

    logger = logging.getLogger(loggername)
    logger.setLevel(logging.DEBUG)

    f = '[%(asctime)s][%(processName)-5s][%(levelname)-21s]%(message)s'
    formatter = MyFormatter(f)
    fileformatter = logging.Formatter(f)

    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.DEBUG)
    stream_handler.setFormatter(formatter)
    logger.addHandler(stream_handler)

    # Log level colors
    logging.addLevelName(
        logging.DEBUG,
        "\033[1;34m{}\033[1;0m".format(logging.getLevelName(logging.DEBUG)))
    logging.addLevelName(
        logging.INFO,
        "\033[1;32m{}\033[1;0m".format(logging.getLevelName(logging.INFO)))
    logging.addLevelName(
        logging.WARNING,
        "\033[1;33m{}\033[1;0m".format(logging.getLevelName(logging.WARNING)))
    logging.addLevelName(
        logging.ERROR,
        "\033[1;31m{}\033[1;0m".format(logging.getLevelName(logging.ERROR)))
    logging.addLevelName(
        logging.CRITICAL,
        "\033[1;41m{}\033[1;0m".format(logging.getLevelName(logging.CRITICAL)))

    fpath = os.join(os.path.dirname(os.path.abspath(__file__)), fname)
    add_file_handler(fpath)
예제 #30
0
def run_cmd(self, tool, cmd):
    import subprocess
    from settings import LOGGER as logger

    tool = subprocess.check_output(["which", tool]).strip()
    if not tool:
        raise Exception('{} not found'.format(tool))
    try:
        args = " ".join(cmd).split(" ")
        pipe = subprocess.Popen(args, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        out, err = pipe.communicate()
        code = pipe.returncode
    except Exception as message:
        logger.debug('{}'.format(message))
예제 #31
0
    def delete_record_in_bd(self, tb_name: str, where: str) -> None:
        """
        Функция удаления записи из таблицы
        :param tb_name: имя таблицы из которой удаляется запись
        :param where: параметры по которым искать нужную запись
        :return:
        """
        LOGGER.warning(
            f'Начинаю удаление записи where={where}, таблица GetRequestsApi'
        )
        self.remote_control_bd.execute(f'DELETE FROM {tb_name} WHERE {where}')
        self.connect_bd.commit()

        LOGGER.info('Успешно удалена')
예제 #32
0
def takeAll(f1,f2,*args,**kwargs):
  c1,c2 = ensureList(f1['content']),ensureList(f2['content'])

  # Assert:
  # 1. Content is a list
  # 2. There is only one type of element in each list
  assert len(set([type(i) for i in c1]))==len(set([type(i) for i in c1]))==1

  #If the elements aren't dicts, simply return the union
  if not isinstance(c1[0], dict):
    res = []
    for c in set(c1).union(c2):
      origin = []
      if c in c1:
        origin.append(f1['@origin'])
      if c in c2:
        origin.append(f2['@origin'])
      res.append({
        'content': c,
        '@origin': '; '.join(origin),
        })
    return res

  #If the elements are dicts, we need to deconstruct each dict, compare if it is duplicated, and then re-constuct
  #We won't go deeper than the k,v pair: ie, we will ignore if v is a nested structure
  elif isinstance(c1[0], dict):
    res = []
    for f in [f1,f2]:
      for c in ensureList(f['content']):
        if c in res:
          continue
        origin = []
        if c in ensureList(f1['content']):
          origin.append(f1['@origin'])
        if c in ensureList(f2['content']):
          origin.append(f2['@origin'])
        res.append({
          'content': c.get('content',c),
          '@origin': '; '.join(list(set(origin))),
        })
    return {'content':res,'@origin': '%s; %s' % (f1['@origin'],f2['@origin'])}

  #If elements are neither, we have a problem!
  LOGGER.critical('takeAll merger didnt get normalized data')
  raise TypeError (c1,c2)
예제 #33
0
def publish(records,max_queue_size=30,url=psettings.RABBITMQ_URL,exchange='MergerPipelineExchange',routing_key='FindNewRecordsRoute',LOGGER=LOGGER):
  #Its ok that we create/tear down this connection many times within this script; it is not a bottleneck
  #and likely slightly increases stability of the workflow

  w = RabbitMQWorker()
  w.connect(psettings.RABBITMQ_URL)

  #Hold onto the message if publishing it would cause the number of queued messages to exceed max_queue_size
  responses = [w.channel.queue_declare(queue=i,passive=True) for i in ['UpdateRecordsQueue','ReadRecordsQueue']]
  while any([r.method.message_count >= max_queue_size for r in responses]):
    LOGGER.debug(">%s messages in the relevant queue(s). I will wait 15s while they get consumed." % max_queue_size)
    time.sleep(15)
    responses = [w.channel.queue_declare(queue=i,passive=True) for i in ['UpdateRecordsQueue','ReadRecordsQueue']]
  
  payload = json.dumps(records)
  w.channel.basic_publish('MergerPipelineExchange','FindNewRecordsRoute',payload)
  LOGGER.debug("Published payload with hash: %s" % hash(payload))
  w.connection.close()
예제 #34
0
        publish(records)
    else:
      s = time.time()
      records = utils.findChangedRecords(records,LOGGER,MONGO)
      LOGGER.info('[%s] Found %s records to be updated in %0.1fs' % (target,len(records),(time.time()-s)))

      if args.load_from_files:
        records,targets = utils.readRecordsFromFiles(records,args.load_from_files,LOGGER)
      else:
        records,targets = utils.readRecords(records,LOGGER)

      s = time.time()
      records = utils.updateRecords(records,targets,LOGGER)
      LOGGER.info('[%s] Updating %s records took %0.1fs' % (target,len(records),(time.time()-s)))

      s = time.time()
      utils.mongoCommit(records,LOGGER,MONGO)
      LOGGER.info('Wrote %s records to mongo in %0.1fs' % (len(records),(time.time()-s)))
      
      LOGGER.debug('--End-- (%0.1fs)' % (time.time()-start))
  return records

if __name__ == '__main__':
  try:
    main()
  except SystemExit:
    pass #this exception is raised by argparse if -h or wrong args given; we will ignore.
  except:
    LOGGER.exception('Traceback:')
    raise
예제 #35
0
def main(LOGGER=LOGGER,MONGO=MONGO,*args):
  PROJECT_HOME = os.path.abspath(os.path.dirname(__file__))
  start = time.time()
  LOGGER.debug('--Start--') 
  if args:
    sys.argv.extend(*args)

  parser = argparse.ArgumentParser()

  parser.add_argument(
    '--bibcode-files',
    nargs='*',
    default=CLASSIC_BIBCODES.values(),
    dest='updateTargets',
    help='full paths to bibcode files'
    )

  parser.add_argument(
    '--bibcodes',
    nargs='*',
    default=None,
    dest='targetBibcodes',
    help='Only analyze the specified bibcodes'
    )

  parser.add_argument(
    '--async',
    default=False,
    action='store_true',
    dest='async',
    help='start in async mode'
    )

  parser.add_argument(
    '--load-records-from-files',
    nargs='*',
    default=None,
    dest='load_from_files',
    help='Load XML records from files via pickle instead of ADSExports',
    )

  args = parser.parse_args()
  LOGGER.debug('Recieved args (%s)' % (args))
  for target in args.updateTargets:
    targetRecords = []
    LOGGER.info('Working on bibcodes in %s' % target)
    
    s = time.time() #Let's eventually use statsd for these timers :)
    with cd(PROJECT_HOME):
      with open(target) as fp:
        records = []
        for line in fp:
          if not line or line.startswith("#"):
            continue
          r = tuple(line.strip().split('\t'))
          if args.targetBibcodes:
            if r[0] in args.targetBibcodes:
              records.append(r)
          else:
            records.append(r)
          if args.async and len(records) >= BIBCODES_PER_JOB:
            #We will miss the last batch of records unless it the total is evenly divisible by BIBCODES_PER_JOB
            publish(records)
            records = []
            #TODO: Throttling?

    LOGGER.debug('[%s] Read took %0.1fs' % (target,(time.time()-s)))
    #Publish any leftovers in case the total was not evenly divisibly
    if args.async:
      if records:
        publish(records)
    else:
      s = time.time()
      records = utils.findChangedRecords(records,LOGGER,MONGO)
      LOGGER.info('[%s] Found %s records to be updated in %0.1fs' % (target,len(records),(time.time()-s)))

      if args.load_from_files:
        records,targets = utils.readRecordsFromFiles(records,args.load_from_files,LOGGER)
      else:
        records,targets = utils.readRecords(records,LOGGER)

      s = time.time()
      records = utils.updateRecords(records,targets,LOGGER)
      LOGGER.info('[%s] Updating %s records took %0.1fs' % (target,len(records),(time.time()-s)))

      s = time.time()
      utils.mongoCommit(records,LOGGER,MONGO)
      LOGGER.info('Wrote %s records to mongo in %0.1fs' % (len(records),(time.time()-s)))
      
      LOGGER.debug('--End-- (%0.1fs)' % (time.time()-start))