def select_file(title, *file_types): # we do the import here because we don't want the initialisation # output to scramble with the output from _a_big_red_button.support.directory import DEPLOYMENT_ROOT from _a_big_red_button.support.log import get_logger # prepare logger logger = get_logger('export') # assemble select arguments and run the selecting script completed_process = subprocess.run([ sys.executable, "-m", "_a_big_red_button.support.select_file", title, *[item for file_type in file_types for item in file_type] ], cwd=DEPLOYMENT_ROOT, capture_output=True) # parse the result if completed_process.returncode != 0: logger.error(f"cannot select file: error code " f"{completed_process.returncode}") return None # parse the return code output = completed_process.stdout.decode() if not output: logger.error(f"cannot select file: unknown error") return None if output[0] == 'E': logger.error(f"cannot select file: {output[2:]}") return None elif output[0] == 'S': file = Path(output[2:].strip()) logger.info(f"selected file: {file}") return file else: logger.error(f"cannot select file: unrecognised output: {output[2:]}") return None
export scripts, such as listing, creating new, exporting with a selected script, etc. Kevin Ni, [email protected]. """ import os from pathlib import Path import importlib from _a_big_red_button.support.log import get_logger from _a_big_red_button.support.configuration import get_config from _a_big_red_button.crawler.export_helper import WokPersistentSessionExportHelper from _a_big_red_button.crawler.db import WokPersistentSession # prepare the logger logger = get_logger('export') # parse config config = get_config('export') # resolve the path to all the export scripts EXPORT_SCRIPT_DIR: Path = Path(__file__).parent.joinpath('export') class WokPersistentSessionExportScript: def __init__(self, name: str): script_full_path = EXPORT_SCRIPT_DIR.joinpath(f'{name}.py') if not script_full_path.exists(): raise RuntimeError(f'the requested export script does not exist: ' f'{name} => {script_full_path}')
""" This script implements utilities to parse and manage a print page from Web of Science. Kevin Ni, [email protected]. """ from typing import * from lxml import etree from _a_big_red_button.support.configuration import get_config from _a_big_red_button.support.log import get_logger from _a_big_red_button.crawler.article_attribute_parser import * # get logger logger = get_logger('crawler') # get config config = get_config('crawler') def normalize_name_abbr(name: str): return name.strip().replace(',', '').replace('.', '').upper() class WoKCitation: @staticmethod def make_empty(): return WoKCitation('', '', 2100, None, '', None, None) def __init__(self, journal: str,
Implements analyser functions for Social Network Analysis. Kevin Ni, [email protected]. """ import json from pathlib import Path from typing import * import subprocess import sys from _a_big_red_button.support.log import get_logger from _a_big_red_button.support.singleton import Singleton from _a_big_red_button.support.directory import DEPLOYMENT_ROOT # prepare the logger logger = get_logger('analyser') class WokAnalyser(metaclass=Singleton): def __init__(self): self.file: Optional[Path] = None def select_file(self): # show a file selection dialogue completed_process = subprocess.run( [sys.executable, "-m", "_a_big_red_button.support.select_file"], cwd=DEPLOYMENT_ROOT, capture_output=True) if completed_process.returncode != 0: logger.error(f"cannot select file: error code " f"{completed_process.returncode}")
from typing import * import re import csv import requests import time from io import StringIO from lxml import etree from queue import Queue, Empty, Full import threading from _a_big_red_button.support.configuration import get_config from _a_big_red_button.support.log import get_logger from _a_big_red_button.crawler.print_list import WoKPrintList from _a_big_red_button.consolesync import CONSOLE_SYNC_HANDLER # get logger logger = get_logger('crawler', CONSOLE_SYNC_HANDLER, force_add_additional=True) # get config _config = get_config('crawler') class WokSearchResult: def __init__(self, result_url: str, result_count: int, search_id: str, search_term: str, session: requests.Session, headers: Dict[str, str]): self.result_url, self.result_count = result_url, result_count self.search_id, self.search_term = search_id, search_term self.session, self.headers = session, headers # threading primitives self.task_queue = Queue()
from pathlib import Path from flask import Flask, render_template, request from _a_big_red_button.crawler.controller import Wok from _a_big_red_button.crawler.db import WokPersistentStorage, WokPersistentSession from _a_big_red_button.crawler.db_meta import WokPersistentSessionMeta from _a_big_red_button.support.select_file import select_file from _a_big_red_button.crawler.export_script_helper import available_export_scripts, get_export_script from _a_big_red_button.crawler.export_worker import WokPersistentSessionExportScriptThreadedRunner from _a_big_red_button.support.response import good, bad from _a_big_red_button.support.log import get_logger from _a_big_red_button.support.configuration import get_config from _a_big_red_button.crawler.db_search import search_in_all_sessions # prepare logger logger = get_logger('controller-front') def poll_search_progress(): if Wok().is_searching: return good(finished=False) if Wok().search_done: if Wok().search_went_wrong: what = Wok().search_what_went_wrong if what is None: return bad("search failed on unknown error, " "check your search term and search again") return bad(f"search failed: {what}") return good(finished=True, result_count=Wok().search_result_count) return bad("cannot poll search progress: have you searched?")