def _scan_profile( url: str, org: Optional[str], fid: Optional[str], useragent: Optional[str], gen_newfileuid: Optional[bool], max_workers: Optional[int] = None, timeout: Optional[float] = None, ) -> ScanResults: """ Report permutations of OFX version/prettyprint/unclosedelements that successfully download OFX profile from server. Returns a 3-tuple of (OFXv1 results, OFXv2 results, signoninfo), each type(dict). OFX results provide ``ofxget`` configs that will work to make a basic OFX connection. SIGNONINFO reports further information that may be helpful to authenticate successfully. """ logger.info((f"Scanning url={url} org={org} fid={fid} " f"max_workers={max_workers} timeout={timeout}")) client = OFXClient(url, org=org, fid=fid, useragent=useragent) futures = _queue_scans(client, gen_newfileuid, max_workers, timeout) # The primary data we keep is actually the metadata (i.e. connection # parameters - OFX version; prettyprint; unclosedelements) tagged on # the Future by _queue_scans() that gave us a successful OFX connection. success_params: FormatMap = defaultdict(list) # If possible, we also parse out some data from SIGNONINFO included in # the PROFRS. signoninfo: SignoninfoReport = {} # Assume that SIGNONINFO is the same for each successful OFX PROFRS. # Tell _read_scan_response() to stop parsing out SIGNONINFO once # it's successfully extracted one. for future in concurrent.futures.as_completed(futures): version, format = futures[future] valid, signoninfo_ = _read_scan_response(future, not signoninfo) if not valid: continue if not signoninfo and signoninfo_: signoninfo = signoninfo_ logger.debug( (f"OFX connection success, version={version}, format={format}")) success_params[version].append(format) v1_result, v2_result = [ collate_scan_results(ver) for ver in utils.partition( lambda it: it[0] >= 200, success_params.items()) ] # V2 always has closing tags for elements; just report prettyprint for fmt in v2_result["formats"]: assert not fmt["unclosedelements"] del fmt["unclosedelements"] results = (v1_result, v2_result, signoninfo) logger.info(f"Scan results: {results}") return results
def _convert(cls, elem: ET.Element) -> "Aggregate": """ Instantiate from ``xml.etree.ElementTree.Element``. N.B. this method most be called on the appropriate subclass, not the ``Aggregate`` base class. """ if len(elem) == 0: return cls() # Hook to modify incoming ``ET.Element`` before conversion elem = cls.groom(elem) spec = list(cls.spec) listitems = cls.listitems listelements = cls.listelements def extractArgs( elem: ET.Element) -> Tuple[Tuple[str, Any], Tuple[int, Any]]: """ Transform input ET.Element into attribute name/ value pairs ready to pass to Aggregate.__init__(), as well as a sequence check. """ key = elem.tag.lower() try: index = spec.index(key) except ValueError: clsnm = cls.__name__ raise OFXSpecError( f"{clsnm}.spec = {spec}; does not contain {key}") if key in cls.unsupported: value: Optional[Union[str, Aggregate]] = None elif elem.text: # Element - extract raw text string; it will be type converted # when used to set an Aggregate class attribute value = elem.text else: # Aggregate - perform type conversion value = Aggregate.from_etree(elem) return (key, value), (index, key in listitems or key in listelements) def outOfOrder(index0: Tuple[int, bool], index1: Tuple[int, bool]) -> bool: """ Do SubElements appear not in the order defined by SubClass.spec? """ idx0, isListItem0 = index0 idx1, isListItem1 = index1 # Relative order of ListItems/Elements doesn't matter, but position of # ListItems/Elements relative to non-ListItems/Elements (and that of # non-ListItems/Elements relative to other non-ListItems/Elements) # does matter. return idx1 <= idx0 and (not isListItem0 or not isListItem1) args_, specIndices = zip(*[extractArgs(subelem) for subelem in elem]) clsnm = cls.__name__ logger.debug(f"Args to instantiate {clsnm}: {args_}") if any([ outOfOrder(index0, index1) for index0, index1 in pairwise(specIndices) ]): subels = [el.tag for el in elem] raise OFXSpecError(f"{clsnm} SubElements out of order: {subels}") kwargs, args = partition( lambda p: p[0] in listitems or p[0] in listelements, args_) return cls(*[arg[1] for arg in args], **dict(kwargs))
def scan_profile(url, org, fid, timeout=None): """ Report permutations of OFX version/prettyprint/unclosed_elements that successfully download OFX profile from server. Returns a pair of (OFXv1 results, OFXv2 results), each type(dict). dict values provide ``ofxget`` configs that will work to connect. """ if timeout is None: timeout = 5 ofxv1 = [102, 103, 151, 160] ofxv2 = [200, 201, 202, 203, 210, 211, 220] futures = {} client = OFXClient(url, org, fid) with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: for prettyprint in (False, True): for close_elements in (False, True): futures.update({ executor.submit(client.request_profile, version=version, prettyprint=prettyprint, close_elements=close_elements, timeout=timeout): (version, prettyprint, close_elements) for version in ofxv1 }) futures.update({ executor.submit(client.request_profile, version=version, prettyprint=prettyprint, close_elements=True, timeout=timeout): (version, prettyprint, True) for version in ofxv2 }) working = defaultdict(list) for future in concurrent.futures.as_completed(futures): try: response = future.result() except (urllib.error.URLError, urllib.error.HTTPError, ConnectionError, OSError) as exc: cancelled = future.cancel() continue else: (version, prettyprint, close_elements) = futures[future] working[version].append((prettyprint, close_elements)) def collate_results(results): results = list(results) if not results: return [], [] versions, formats = zip(*results) # Assumption: the same formatting requirements apply to all # sub-versions (e.g. 1.0.2 and 1.0.3, or 2.0.3 and 2.2.0). # If a (pretty, close_elements) pair succeeds on most sub-versions # but fails on a few, we'll chalk it up to network transmission # errors and ignore it. # # Translation: just pick the longest sequence of successful # formats and assume it applies to the whole version. formats = max(formats, key=len) formats.sort() formats = [ OrderedDict([("pretty", format[0]), ("unclosed_elements", not format[1])]) for format in formats ] return sorted(list(versions)), formats v2, v1 = utils.partition(lambda pair: pair[0] < 200, working.items()) v1_versions, v1_formats = collate_results(v1) v2_versions, v2_formats = collate_results(v2) # V2 always has closing tags for elements; just report prettyprint for format in v2_formats: del format["unclosed_elements"] return json.dumps((OrderedDict([("versions", v1_versions), ("formats", v1_formats)]), OrderedDict([("versions", v2_versions), ("formats", v2_formats)])))
def _scan_profile(url: str, org: str, fid: str, max_workers: Optional[int] = None, timeout: Optional[float] = None) -> Tuple[ScanResult, ScanResult, Mapping[str, bool], ]: """ Report permutations of OFX version/prettyprint/unclosedelements that successfully download OFX profile from server. Returns a 3-tuple of (OFXv1 results, OFXv2 results, signoninfo), each type(dict). OFX results provide ``ofxget`` configs that will work to make a basic OFX connection. SIGNONINFO provides further auth information that may be needed to succssfully log in. """ if timeout is None: timeout = 5.0 if max_workers is None: max_workers = 5 ofxv1 = [102, 103, 151, 160] ofxv2 = [200, 201, 202, 203, 210, 211, 220] futures = {} client = OFXClient(url, org=org, fid=fid) with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: for prettyprint in (False, True): for close_elements in (False, True): futures.update({executor.submit( client.request_profile, version=version, prettyprint=prettyprint, close_elements=close_elements, timeout=timeout): (version, prettyprint, close_elements) for version in ofxv1}) futures.update({executor.submit( client.request_profile, version=version, prettyprint=prettyprint, close_elements=True, timeout=timeout): (version, prettyprint, True) for version in ofxv2}) # The only thing we're measuring here is success (indicated by receiving # a valid HTTP response) or failure (indicated by the request's # throwing any of various errors). We don't examine the actual response # beyond simply parsing it to verify that it's valid OFX. The data we keep # is actually the metadata (i.e. connection parameters like OFX version # tried for a request) stored as values in the ``futures`` dict. working: Mapping[int, List[tuple]] = defaultdict(list) signoninfos: MutableMapping[int, Any] = defaultdict(OrderedDict) for future in concurrent.futures.as_completed(futures): try: response = future.result() except (URLError, HTTPError, ConnectionError, OSError, ) as exc: future.cancel() continue (version, prettyprint, close_elements) = futures[future] working[version].append((prettyprint, close_elements)) # ``response`` is an HTTPResponse; doesn't have seek() method used # by ``header.parse_header()``. Repackage as BytesIO for parsing. if not signoninfos[version]: try: signoninfos_ = extract_signoninfos(BytesIO(response.read())) assert len(signoninfos_) > 0 info = signoninfos_[0] bool_attrs = ("chgpinfirst", "clientuidreq", "authtokenfirst", "mfachallengefirst", ) signoninfo_ = OrderedDict([ (attr, getattr(info, attr, None) or False) for attr in bool_attrs]) signoninfos[version] = signoninfo_ except (ValueError, ): pass signoninfos = {k: v for k, v in signoninfos.items() if v} if signoninfos: highest_version = max(signoninfos.keys()) signoninfo = signoninfos[highest_version] else: signoninfo = OrderedDict() def collate_results( results: Tuple[int, Tuple[bool, bool]] ) -> Tuple[List[int], List[MutableMapping[str, bool]]]: """ Transform our metadata results (version, prettyprint, close_elements) into a 2-tuple of ([OFX version], [format]) where each format is a dict of {"pretty": bool, "unclosedelements": bool} representing a pair of configs that should successully connect for those versions. Input ``results`` needs to be a complete set for either OFXv1 or v2, with no results for the other version admixed. """ results_ = list(results) if not results_: return [], [] versions, formats = zip(*results_) # type: ignore # Assumption: the same formatting requirements apply to all # sub-versions (e.g. 1.0.2 and 1.0.3, or 2.0.3 and 2.2.0). # If a (pretty, close_elements) pair succeeds on most sub-versions # but fails on a few, we'll chalk it up to network transmission # errors and ignore it. # # Translation: just pick the longest sequence of successful # formats and assume it applies to the whole version. formats = max(formats, key=len) formats.sort() formats = [OrderedDict([("pretty", fmt[0]), ("unclosedelements", not fmt[1])]) for fmt in formats] return sorted(list(versions)), formats v2, v1 = utils.partition(lambda result: result[0] < 200, working.items()) v1_versions, v1_formats = collate_results(v1) v2_versions, v2_formats = collate_results(v2) # V2 always has closing tags for elements; just report prettyprint for format in v2_formats: assert not format["unclosedelements"] del format["unclosedelements"] return (OrderedDict([("versions", v1_versions), ("formats", v1_formats)]), OrderedDict([("versions", v2_versions), ("formats", v2_formats)]), signoninfo, )