Example #1
0
    def _convert_files(obj, eng):
        from invenio_knowledge.api import get_kb_mappings
        mappings = dict(
            map(
                lambda item: (item['key'], item['value']),
                get_kb_mappings('JOURNALS')
            )
        )
        ws = WorldScientific(mappings)

        target_folder_full = get_storage_path(suffix=target_folder)

        args = obj.extra_data['args']
        # By default, we set the from date as today
        to_date = args.get("to_date") or datetime.now().strftime('%Y-%m-%d')

        # By last resort, we set the from date a week before
        from_date = args.get("from_date") or cache.get(date_key) \
            or (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')

        obj.extra_data['args']["to_date"] = to_date
        obj.extra_data['args']["from_date"] = from_date

        insert_files = []
        filenames = obj.data['extracted_files']
        for filename in filenames:
            date = ws.get_date(filename)
            if from_date <= date <= to_date:
                marc = ws.get_record(filename)
                if marc:
                    filename = basename(filename)
                    filename = join(target_folder_full, filename)
                    insert_files.append(filename)
                    with open(filename, 'w') as outfile:
                        outfile.write(marc)

        obj.log.info("Converted {0} articles between {1} to {2}".format(
            len(insert_files),
            from_date,
            to_date
        ))

        obj.data['insert'] = insert_files
        obj.data["result_path"] = target_folder_full

        obj.log.debug("Saved converted files to {0}".format(target_folder_full))
        obj.log.debug("{0} files to add".format(
            len(obj.data["insert"]),
        ))
    def _convert_files(obj, eng):
        from invenio_knowledge.api import get_kb_mappings
        mappings = dict(
            map(
                lambda item: (item['key'], item['value']),
                get_kb_mappings('JOURNALS')
            )
        )
        ws = WorldScientific(mappings)

        target_folder_full = get_storage_path(suffix=target_folder)

        args = obj.extra_data['args']

        # By default, we set the from date as today
        to_date = args.get("to_date") or datetime.now().strftime('%Y-%m-%d')

        # By last resort, we set the from date months before
        from_date = args.get("from_date")

        if not from_date:
            if args.get("reharvest"):
                # Since "beginning" of time when not specified
                from_date = datetime.strptime("1900-01-01", "%Y-%m-%d")
            else:
                # Dynamic date in the past when not specified and not reharvest
                from_date = datetime.now() - timedelta(weeks=weeks_threshold)\
                    .strftime('%Y-%m-%d')

        obj.extra_data['args']["to_date"] = to_date
        obj.extra_data['args']["from_date"] = from_date

        insert_files = []
        if args.get("reharvest"):
            filenames = obj.data['all_extracted_files']
        else:
            filenames = obj.data['newly_extracted_files']

        for filename in filenames:
            date = ws.get_date(filename)
            if date is None or (from_date <= date <= to_date):
                marc = ws.get_record(filename)
                if marc:
                    filename = basename(filename)
                    filename = join(target_folder_full, filename)
                    insert_files.append(filename)
                    with open(filename, 'w') as outfile:
                        outfile.write(marc)
            else:
                obj.log.info("Filtered out {0} ({1})".format(filename, date))

        obj.log.info("Converted {0}/{1} articles between {2} to {3}".format(
            len(insert_files),
            len(filenames),
            from_date,
            to_date
        ))

        obj.data['insert'] = insert_files
        obj.data["result_path"] = target_folder_full

        obj.log.debug("Saved converted files to {0}".format(target_folder_full))
        obj.log.debug("{0} files to add".format(
            len(obj.data["insert"]),
        ))