def splitter( self, buffer: typ.Text ) -> typ.Optional[typ.Tuple[typ.Dict[str, typ.Any], typ.Text]]: """ Decode the partial JSON object and return it. """ buffer = buffer.strip() try: obj, index = self._decoder.raw_decode(buffer) rest = buffer[json.decoder.WHITESPACE.match(buffer, index).end():] return obj, rest except ValueError: return None
def addParameter(self, parameter: tg.Text, value: tg.Text) -> None: value = value.strip() if value: self.parameters[parameter] = value # viable query criteria provided self.queryProvided = True # set flags based on parameter if parameter == 'article_number': self.usingArticleNumber = True if parameter == 'boolean_text': self.usingBoolean = True if parameter in ('facet', 'd-au', 'd-year', 'd-pubtype', 'd-publisher'): self.usingFacet = True
def unp4k( p4k_file: typing.Text, output: typing.Text = ".", file_filter: typing.Text = "*", convert_cryxml: bool = False, single: bool = False, ): output = Path(output).absolute() p4k_file = Path(p4k_file) file_filter = file_filter.strip("'").strip('"') if not p4k_file.is_file(): sys.stderr.write(f"Could not open p4k file {p4k_file}\n") sys.exit(1) print(f"Opening p4k file: {p4k_file}") try: p = p4k.P4KFile(str(p4k_file)) except KeyboardInterrupt: sys.exit(1) if single: print(f"Extracting first match for filter '{file_filter}' to {output}") print("=" * 80) found_files = p.search(file_filter) if not found_files: sys.stderr.write(f"No files found for filter") sys.exit(2) extract_file = found_files[0] print(f"Extracting {extract_file.filename}") if output.name: # given an output name - use it instead of the name in the P4K output.parent.mkdir(parents=True, exist_ok=True) with p.open(extract_file) as source, open(str(output), "wb") as target: shutil.copyfileobj(source, target) else: output.mkdir(parents=True, exist_ok=True) p.extract(extract_file, path=str(output), convert_cryxml=convert_cryxml) else: print(f"Extracting files into {output} with filter '{file_filter}'") print("=" * 80) output.mkdir(parents=True, exist_ok=True) try: p.extract_filter(file_filter=file_filter, path=str(output), convert_cryxml=convert_cryxml) except KeyboardInterrupt: pass
def unforge( forge_file: typing.Text, file_filter: typing.Text = "*", output: typing.Text = ".", single: bool = False, ): forge_file = Path(forge_file) output = Path(output).absolute() file_filter = file_filter.strip("'").strip('"') if not forge_file.is_file(): sys.stderr.write(f"Could not open DataForge file {forge_file}\n") sys.exit(1) print(f"Opening DataForge file: {forge_file}") dcb = forge.DataCoreBinary(str(forge_file)) if single: print(f"Extracting first match for filter '{file_filter}' to {output}") print("=" * 120) records = dcb.search_filename(file_filter) if not records: sys.stderr.write(f"No files found for filter") sys.exit(2) record = records[0] print(f"Extracting {record.filename}") if not output.name: output = output / Path(record.filename) output.parent.mkdir(parents=True, exist_ok=True) # given an output name - use it instead of the name in the record with open(str(output), "wb") as target: target.writelines(dcb.dump_record_json(record)) else: print(f"Extracting files into {output} with filter '{file_filter}'") print("=" * 120) for record in dcb.search_filename(file_filter): record_output = output / Path(record.filename) record_output.parent.mkdir(parents=True, exist_ok=True) print(str(record_output)) with open(str(record_output), "w") as target: target.writelines(dcb.dump_record_json(record))
def unforge( forge_file: typing.Text, file_filter: typing.Text = "*", output: typing.Text = ".", single: bool = False, ): forge_file = Path(forge_file) output = Path(output).absolute() if output != '-' else output file_filter = file_filter.strip("'").strip('"') if not forge_file.is_file(): sys.stderr.write(f"Could not open DataForge file {forge_file}\n") sys.exit(1) print(f"Opening DataForge file: {forge_file}") dcb = forge.DataCoreBinary(str(forge_file)) if single: print(f"Extracting first match for filter '{file_filter}' to {output}") print("=" * 120) records = dcb.search_filename(file_filter) if not records: sys.stderr.write(f"No files found for filter") sys.exit(2) record = records[0] print(f"Extracting {record.filename}") if output == '-': sys.stdout.write(dcb.dump_record_json(record)) else: _dump_record(dcb, record, output) else: print(f"Extracting files into {output} with filter '{file_filter}'") print("=" * 120) for record in dcb.search_filename(file_filter): _dump_record(dcb, record, output)
def _storage_not_supported(_uri: t.Text) -> t.Text: """ Helper function to guard against unsupported storage. """ _uri = _uri.strip() if _uri.startswith('storage:'): raise NotImplementedError(f"Storage protocol (uri={_uri}) is not supported yet.") return _uri