def __init__( self, session: FormsiteSession = None, token: str = None, server: str = None, directory: str = None, ) -> None: """FormsiteFormsList master constructor Args: session (FormsiteSession): FormsiteSession object OR token (str): Formsite API Token server (str): Formsite Server (fsX.formsite.com) directory (str): Formsite Directory """ super().__init__() self._data: pd.DataFrame = pd.DataFrame() self.session: FormsiteSession = session if (session is None and token is not None and server is not None and directory is not None): self.session = FormsiteSession(token, server, directory) self.logger: FormsiteLogger = FormsiteLogger() self.forms_url = f"{self.session.url_base}/forms"
def __init__( self, cache_dir: str, serialization_format: str = "feather", ) -> None: """FormCache constructor Args: cache_dir (str): Path where you want to store the form. serialization_format (str, optional): Defaults to "feather". Serialization formts: `feather` `hdf` `parquet` `pickle` `json` `csv` """ _VALID_FORMATS = ["feather", "hdf", "parquet", "pickle", "json", "csv"] assert ( serialization_format in _VALID_FORMATS ), f"Invalid serialization format: {serialization_format}" self.logger: FormsiteLogger = FormsiteLogger() self.cache_dir = Path(cache_dir).resolve().as_posix() self.s_format = serialization_format os.makedirs(self.cache_dir, exist_ok=True)
def __init__( self, url_path_list: List[str, str], num_workers: int, callback: Optional[Callable] = None, ) -> None: self.url_path_list = url_path_list self.num_workers = num_workers self.callback = callback self.logger: FormsiteLogger = FormsiteLogger() # ---- self.total: int = len(self.url_path_list) self.enqueued: int = len(self.url_path_list) self.in_progress: int = 0 self.success: int = 0 self.failed: int = 0 self.failed_urls: set = set() self.success_urls: set = set() self.complete_urls: list = []
def __init__(self, token: str, server: str, directory: str): """Create a FormSite session object Args: token (str): Formsite API Token server (str): Formsite Server (fsX.formsite.com) directory (str): Formsite User directory """ self.url_base: str = f"https://{server}.formsite.com/api/v2/{directory}" self.server = server self.directory = directory self._session = requests.session() self._session.headers.update({ "Authorization": f"bearer {token}", "Accept": "application/json", }) self.logger: FormsiteLogger = FormsiteLogger() self.logger.debug( f"Formsite Session: Initilazied object for url '{self.url_base}'")
def __init__( self, download_folder: str, queue: asyncio.Queue, semaphore: asyncio.Semaphore, session: ClientSession, internal_state: DownloadWorkerState, timeout: int = 160, max_attempts: int = 3, ) -> None: # ---- self.download_folder = download_folder self.queue = queue self.semaphore = semaphore self.session = session self.internal_state = internal_state self.timeout = timeout self.max_attempts = max_attempts self.logger: FormsiteLogger = FormsiteLogger() # ---- self.callback = internal_state.callback self.client_timeout = ClientTimeout(total=timeout)
def __init__( self, form_id: str, session: FormsiteSession, params: FormsiteParameters, ): """FormFetcher constructor Args: form_id (str): Formsite Form ID session (FormsiteSession): FormsiteSession object """ self.form_id = form_id self.session = session self.params = params self.results_params = params.results_params_dict() self.items_params = params.items_params_dict() self.results_url = f"{session.url_base}/forms/{self.form_id}/results" self.items_url = f"{session.url_base}/forms/{self.form_id}/items" self.total_pages = 1 self.cur_page = 1 self.logger: FormsiteLogger = FormsiteLogger()
def __init__( self, download_dir: str, sorted_urls: List[str, str], workers: int = 5, timeout: int = 160, max_attempts: int = 1, callback: Optional[Callable] = None, ) -> None: """AsyncFormDownloader constructor Args: download_dir (str) sorted_urls (List[str, str]): List of (url, path) workers (int, optional): Number of concurrent downloads. Defaults to 5. timeout (int, optional): Download timeout. Defaults to 160. max_attempts (int, optional): Max number of attempts. Defaults to 1. callback (Optional[Callable], optional): Callback called each time a download is complete. Defaults to None. Callback function signature: (url: str, path: str, total_files: int) -> None """ # ---- self.download_dir = download_dir self.url_path_list = sorted_urls self.workers = workers self.timeout = timeout self.max_attempts = max_attempts self.logger: FormsiteLogger = FormsiteLogger() # ---- self.semaphore = asyncio.Semaphore(self.workers) self.dl_queue = asyncio.Queue() self.internal_state = DownloadWorkerState( self.url_path_list, self.workers, callback=callback, )
def __init__(self) -> None: self.data = [] self.children_item_re = re.compile(r"(\d+?-\d+?-\d+?)") self.logger: FormsiteLogger = FormsiteLogger()
def __init__(self) -> None: """FormsiteFormData constructor""" self._uses_items = None self._items = None self._data = pd.DataFrame() self.logger: FormsiteLogger = FormsiteLogger()
def main(): """The main program (CLI)""" global FETCH_PBAR global DOWNLOAD_PBAR LOG = FormsiteLogger() args = get_args() # Initialize logging if args.verbose: LOG.level = logging.DEBUG fh = logging.FileHandler(sys.stdout) LOG.addHandler(fh) # Initialize session with FormsiteSession(args.token, args.server, args.directory) as session: # ---- if args.list_forms is not None: forms_list = FormsiteFormsList.from_session(session) forms_list.fetch() # ---- if not args.list_forms: if args.sort_list_by in ["results_count", "files_size"]: df = forms_list.data.sort_values( by=args.sort_list_by, ascending=False, ) else: df = forms_list.data.sort_values( by=args.sort_list_by, ascending=True, ) # ---- df = df[[ "name", "form_id", "state", "results_count", "files_size_human" ]] df.columns = [ "name", "form_id", "state", "results count", "files size" ] pd.set_option("display.max_rows", None) pd.set_option("display.max_columns", None) pd.set_option("display.width", None) pd.set_option("display.max_colwidth", 42) print(df.reset_index(drop=True)) else: path = Path(args.list_forms).resolve().as_posix() forms_list.to_csv(path) sys.exit(0) # ---- form = FormsiteForm.from_session(args.form, session) params = FormsiteParameters( last=args.last, after_id=args.afterref, before_id=args.beforeref, after_date=args.afterdate, before_date=args.beforedate, resultslabels=args.resultslabels, resultsview=args.resultsview, timezone=args.timezone, sort=args.sort, ) if not args.disable_progressbars: FETCH_PBAR = tqdm(desc=f"Exporting {args.form}") form.fetch( use_items=args.use_items, params=params, fetch_callback=fetch_pbar_callback, ) if not args.disable_progressbars: FETCH_PBAR.close() # ---- if args.output is not None: save_output(args, form) if args.latest_id is not None: save_latest_id(args, form) if args.extract is not None: save_extract(args, form) if args.download is not None: loop = asyncio.get_event_loop() if not args.disable_progressbars: DOWNLOAD_PBAR = tqdm(desc=f"Downloading from {args.form}") save_download(args, form, loop) if not args.disable_progressbars: DOWNLOAD_PBAR.close()