def acta_cpc(source_file, source_folder): """ Harvests Acta Physica Polonica B or Chinese Physics C packages. Passed files should contain pushed metadata in xml format. Exactly one of the source_file and source_folder parameters should be present. If a folder is passed, all files within the folder will be parsed and processed. The files will always be processed in alphabetical order, so in case an article is available in multiple files, make sure that the alphabetical and chronological orders are equivalent. Ignoring this can result in having an older version of an article. """ entries = get_packages_for_file_or_folder(source_file, source_folder) if not entries: log('No entries, abort.', logging.ERROR) return # harvesting all packages found in source folder # sorting like this will result in a chronological order, because the filename contains # the date and time of delivery. entries_count = len(entries) log('Processing packages...', entry_count=entries_count) for i, path in enumerate(sorted(entries)): if not isfile(path): log('Path not a file. Skipping.', path=path) continue log('processing package', path=path, current_index=i, entry_count=entries_count) with open(path, 'rt') as f: file_data = f.read() obj = parse_received_package(file_data, path) create_from_json({'records': [obj]}, apply_async=False)
def test_delete_halted_workflows(): record = {'dois': [{'value': 'test/doi'}]} workflow_id = create_from_json({'records': [record]}, apply_async=False) workflow = Workflow.query.get(workflow_id) assert workflow.status == WorkflowStatus.HALTED workflow_id2 = create_from_json({'records': [record]}, apply_async=False) workflow2 = Workflow.query.get(workflow_id2) assert workflow_id != workflow_id2 assert workflow2.status == WorkflowStatus.HALTED assert Workflow.query.get(workflow_id) is None
def run_workflow(input_json_filename): """Use input_json_filename to load hepcrawl response and to run article_upload workflow.""" file_path = path.join(get_response_dir(), 'hepcrawl', input_json_filename) with open(file_path, 'rt') as f: json_data = json.loads(f.read()) workflow_id = create_from_json({'records': [json_data]}, apply_async=False)[0] return Workflow.query.get(workflow_id)
def run_workflow_with_data(input_json_data, mock_address): """Runs article_upload workflow with input_json_data. Returns the Workflow object.""" with patch('scoap3.modules.workflows.workflows.articles_upload.__halt_and_notify', mock_halt): mock_address.register_uri('GET', '/schemas/hep.json', content=read_hep_schema()) mock_address.register_uri('GET', '/schemas/elements/titles.json', content=read_titles_schema()) mock_address.register_uri( requests_mock.ANY, re.compile('.*(indexer).*'), real_http=True, ) workflow_id = create_from_json({'records': [input_json_data]}, apply_async=False)[0] return Workflow.query.get(workflow_id)
def run_article_upload_with_data(input_json_data, mock_address): """Runs article_upload workflow with input_json_data. Returns the Workflow object.""" with patch( 'scoap3.modules.workflows.workflows.articles_upload.__halt_and_notify', mock_halt): mock_address.register_uri('GET', '/schemas/hep.json', content=read_hep_schema()) mock_address.register_uri('GET', '/schemas/elements/titles.json', content=read_titles_schema()) mock_address.register_uri( requests_mock.ANY, re.compile('.*(indexer|localhost).*'), real_http=True, ) workflow_id = create_from_json({'records': [input_json_data]}, apply_async=False)[0] return Workflow.query.get(workflow_id)
def handle_upload_request(apply_async=True): """Handle articles that are pushed from publishers.""" remote_addr = request.environ['REMOTE_ADDR'] logger.info('Robotupload request received. remote_addr=%s headers=%s args=%s data=%s' % ( remote_addr, request.headers, request.args, request.data[:100])) validate_request(remote_addr) package_name = 'robotupload_%s_%s' % (datetime.now().isoformat(), remote_addr) logger.info('Package delivered. package_name=%s' % package_name) # save delivered package file_data = request.data save_package(package_name, file_data) obj = parse_received_package(file_data, package_name) journal_title = obj['publication_info'][0]['journal_title'] check_permission_for_journal(journal_title, remote_addr, package_name) return create_from_json({'records': [obj]}, apply_async=apply_async)
def handle_upload_request(apply_async=True): """Handle articles that are pushed from publishers.""" remote_addr = request.environ['REMOTE_ADDR'] validate_request(remote_addr) uploaded_file = request.files['file'] filename = secure_filename(uploaded_file.filename) file_data = uploaded_file.read() logger.info('Package delivered with name %s from %s.' % (filename, remote_addr)) # save delivered package delivery_time = datetime.now().isoformat() package_name = '_'.join((delivery_time, filename, remote_addr)) save_package(package_name, file_data) obj = parse_received_package(file_data, package_name) journal_title = obj['publication_info'][0]['journal_title'] check_permission_for_journal(journal_title, remote_addr, package_name) return create_from_json({'records': [obj]}, apply_async=apply_async)
def test_article_upload(): with open('scoap3/data/scoap3demodata_short.json') as source: records = json.loads(source.read()) create_from_json(records, apply_async=False) assert (Compliance.query.count() == 1)
def loadrecords(source): """Load records migration dump.""" records = json.loads(source.read()) create_from_json(records)