Exemple #1
0
    def __init__(
        self,
        session: FormsiteSession = None,
        token: str = None,
        server: str = None,
        directory: str = None,
    ) -> None:
        """FormsiteFormsList master constructor

        Args:
            session (FormsiteSession): FormsiteSession object

            OR

            token (str): Formsite API Token
            server (str): Formsite Server (fsX.formsite.com)
            directory (str): Formsite Directory
        """
        super().__init__()
        self._data: pd.DataFrame = pd.DataFrame()
        self.session: FormsiteSession = session
        if (session is None and token is not None and server is not None
                and directory is not None):
            self.session = FormsiteSession(token, server, directory)
        self.logger: FormsiteLogger = FormsiteLogger()
        self.forms_url = f"{self.session.url_base}/forms"
Exemple #2
0
    def __init__(
        self,
        cache_dir: str,
        serialization_format: str = "feather",
    ) -> None:
        """FormCache constructor

        Args:
            cache_dir (str): Path where you want to store the form.
            serialization_format (str, optional): Defaults to "feather".

        Serialization formts:
             `feather`
             `hdf`
             `parquet`
             `pickle`
             `json`
             `csv`
        """
        _VALID_FORMATS = ["feather", "hdf", "parquet", "pickle", "json", "csv"]

        assert (
            serialization_format in _VALID_FORMATS
        ), f"Invalid serialization format: {serialization_format}"
        self.logger: FormsiteLogger = FormsiteLogger()
        self.cache_dir = Path(cache_dir).resolve().as_posix()
        self.s_format = serialization_format
        os.makedirs(self.cache_dir, exist_ok=True)
 def __init__(
     self,
     url_path_list: List[str, str],
     num_workers: int,
     callback: Optional[Callable] = None,
 ) -> None:
     self.url_path_list = url_path_list
     self.num_workers = num_workers
     self.callback = callback
     self.logger: FormsiteLogger = FormsiteLogger()
     # ----
     self.total: int = len(self.url_path_list)
     self.enqueued: int = len(self.url_path_list)
     self.in_progress: int = 0
     self.success: int = 0
     self.failed: int = 0
     self.failed_urls: set = set()
     self.success_urls: set = set()
     self.complete_urls: list = []
Exemple #4
0
    def __init__(self, token: str, server: str, directory: str):
        """Create a FormSite session object

        Args:
            token (str): Formsite API Token
            server (str): Formsite Server (fsX.formsite.com)
            directory (str): Formsite User directory

        """

        self.url_base: str = f"https://{server}.formsite.com/api/v2/{directory}"
        self.server = server
        self.directory = directory
        self._session = requests.session()
        self._session.headers.update({
            "Authorization": f"bearer {token}",
            "Accept": "application/json",
        })
        self.logger: FormsiteLogger = FormsiteLogger()
        self.logger.debug(
            f"Formsite Session: Initilazied object for url '{self.url_base}'")
 def __init__(
     self,
     download_folder: str,
     queue: asyncio.Queue,
     semaphore: asyncio.Semaphore,
     session: ClientSession,
     internal_state: DownloadWorkerState,
     timeout: int = 160,
     max_attempts: int = 3,
 ) -> None:
     # ----
     self.download_folder = download_folder
     self.queue = queue
     self.semaphore = semaphore
     self.session = session
     self.internal_state = internal_state
     self.timeout = timeout
     self.max_attempts = max_attempts
     self.logger: FormsiteLogger = FormsiteLogger()
     # ----
     self.callback = internal_state.callback
     self.client_timeout = ClientTimeout(total=timeout)
Exemple #6
0
    def __init__(
        self,
        form_id: str,
        session: FormsiteSession,
        params: FormsiteParameters,
    ):
        """FormFetcher constructor

        Args:
            form_id (str): Formsite Form ID
            session (FormsiteSession): FormsiteSession object
        """
        self.form_id = form_id
        self.session = session
        self.params = params
        self.results_params = params.results_params_dict()
        self.items_params = params.items_params_dict()
        self.results_url = f"{session.url_base}/forms/{self.form_id}/results"
        self.items_url = f"{session.url_base}/forms/{self.form_id}/items"
        self.total_pages = 1
        self.cur_page = 1
        self.logger: FormsiteLogger = FormsiteLogger()
    def __init__(
        self,
        download_dir: str,
        sorted_urls: List[str, str],
        workers: int = 5,
        timeout: int = 160,
        max_attempts: int = 1,
        callback: Optional[Callable] = None,
    ) -> None:
        """AsyncFormDownloader constructor

        Args:
            download_dir (str)
            sorted_urls (List[str, str]): List of (url, path)
            workers (int, optional): Number of concurrent downloads. Defaults to 5.
            timeout (int, optional): Download timeout. Defaults to 160.
            max_attempts (int, optional): Max number of attempts. Defaults to 1.
            callback (Optional[Callable], optional): Callback called each time a download is complete. Defaults to None.

        Callback function signature:
            (url: str, path: str, total_files: int) -> None
        """
        # ----
        self.download_dir = download_dir
        self.url_path_list = sorted_urls
        self.workers = workers
        self.timeout = timeout
        self.max_attempts = max_attempts
        self.logger: FormsiteLogger = FormsiteLogger()
        # ----
        self.semaphore = asyncio.Semaphore(self.workers)
        self.dl_queue = asyncio.Queue()
        self.internal_state = DownloadWorkerState(
            self.url_path_list,
            self.workers,
            callback=callback,
        )
Exemple #8
0
 def __init__(self) -> None:
     self.data = []
     self.children_item_re = re.compile(r"(\d+?-\d+?-\d+?)")
     self.logger: FormsiteLogger = FormsiteLogger()
Exemple #9
0
 def __init__(self) -> None:
     """FormsiteFormData constructor"""
     self._uses_items = None
     self._items = None
     self._data = pd.DataFrame()
     self.logger: FormsiteLogger = FormsiteLogger()
Exemple #10
0
def main():
    """The main program (CLI)"""
    global FETCH_PBAR
    global DOWNLOAD_PBAR
    LOG = FormsiteLogger()
    args = get_args()

    # Initialize logging
    if args.verbose:
        LOG.level = logging.DEBUG
        fh = logging.FileHandler(sys.stdout)
        LOG.addHandler(fh)

    # Initialize session
    with FormsiteSession(args.token, args.server, args.directory) as session:
        # ----
        if args.list_forms is not None:
            forms_list = FormsiteFormsList.from_session(session)
            forms_list.fetch()
            # ----
            if not args.list_forms:
                if args.sort_list_by in ["results_count", "files_size"]:
                    df = forms_list.data.sort_values(
                        by=args.sort_list_by,
                        ascending=False,
                    )
                else:
                    df = forms_list.data.sort_values(
                        by=args.sort_list_by,
                        ascending=True,
                    )
                # ----
                df = df[[
                    "name", "form_id", "state", "results_count",
                    "files_size_human"
                ]]
                df.columns = [
                    "name", "form_id", "state", "results count", "files size"
                ]
                pd.set_option("display.max_rows", None)
                pd.set_option("display.max_columns", None)
                pd.set_option("display.width", None)
                pd.set_option("display.max_colwidth", 42)
                print(df.reset_index(drop=True))
            else:
                path = Path(args.list_forms).resolve().as_posix()
                forms_list.to_csv(path)
            sys.exit(0)
        # ----

        form = FormsiteForm.from_session(args.form, session)
        params = FormsiteParameters(
            last=args.last,
            after_id=args.afterref,
            before_id=args.beforeref,
            after_date=args.afterdate,
            before_date=args.beforedate,
            resultslabels=args.resultslabels,
            resultsview=args.resultsview,
            timezone=args.timezone,
            sort=args.sort,
        )
        if not args.disable_progressbars:
            FETCH_PBAR = tqdm(desc=f"Exporting {args.form}")
        form.fetch(
            use_items=args.use_items,
            params=params,
            fetch_callback=fetch_pbar_callback,
        )
        if not args.disable_progressbars:
            FETCH_PBAR.close()

        # ----

        if args.output is not None:
            save_output(args, form)

        if args.latest_id is not None:
            save_latest_id(args, form)

        if args.extract is not None:
            save_extract(args, form)

        if args.download is not None:
            loop = asyncio.get_event_loop()
            if not args.disable_progressbars:
                DOWNLOAD_PBAR = tqdm(desc=f"Downloading from {args.form}")
            save_download(args, form, loop)
            if not args.disable_progressbars:
                DOWNLOAD_PBAR.close()