def __init__( self, file_path: str, study_id: ObjectId, study_file_id: ObjectId, name: str, *, domain_ranges: Dict = None, **kwargs, ): Annotations.__init__(self, file_path, self.ALLOWED_FILE_TYPES, study_id, study_file_id) # Lowercase coordinate headers, expected for df merge for i, header in enumerate(self.headers): if header in ["X", "Y", "Z"]: self.headers[i] = self.headers[i].lower() self.preprocess() self.determine_coordinates_and_cell_names() self.source_file_type = "cluster" self.cluster_type = ("3d" if ("z" in self.coordinates_and_cell_headers or "Z" in self.coordinates_and_cell_headers) else "2d") self.name = name # Check if domain_ranges is an empty dictionary self.domain_ranges = domain_ranges if not (not domain_ranges) else None
def test_round_trip_annotations(): a = Annotations(foo=1234, zoo=[123.1, 456.2, 789.3], species='Moose', birthdays=[datetime(1969,4,28), datetime(1973,12,8), datetime(2008,1,3)]) sa = a.toSynapseAnnotations() # print sa a2 = Annotations.fromSynapseAnnotations(sa) # print a2 a = a2
def __init__(self, cluster_file, cell_metadata_file=None): Annotations.__init__(self, cluster_file, self.ALLOWED_FILE_TYPES) self.preprocess() self.determine_coordinates_and_cell_names() if cell_metadata_file is not None: self.cell_metadata = Annotations(cell_metadata_file, CellMetadata.ALLOWED_FILE_TYPES)
def test_merge_df(self): cluster = Clusters( "../tests/data/test_1k_cluster_data.csv", "dec0dedfeed1111111111111", "addedfeed000000000000000", "testCluster", ) cell_metadata_df = Annotations( self.CELL_METADATA_PATH, ["text/csv", "text/plain", "text/tab-separated-values"], ) cell_metadata_df.preprocess() cell_names_cell_metadata_df = np.asarray(cell_metadata_df.file["NAME"]) cell_names_cluster_df = np.asarray(cluster.file["NAME"]) # Cell names found in both cluster and metadata files common_cell_names = cell_names_cluster_df[ np.isin(cell_names_cluster_df, cell_names_cell_metadata_df) ] print(f"common cell names: {common_cell_names}") # Perform merge print(cluster.file[["NAME", "x", "y", "z"]]) cluster.merge_df(cluster.file[["NAME", "x", "y", "z"]], cell_metadata_df.file) # Ensure ONLY common cell names found in cell metadata file and cluster file # are in the newly merged df result = all( cell[0] in common_cell_names for cell in cluster.file["NAME"].values ) self.assertTrue( result, f"Merge was not performed correctly. Merge should be performed on 'NAME'", )
def test_validate_numeric_annots(self): cluster = Annotations( "../tests/data/cluster_bad_missing_coordinate.txt", TestAnnotations.ALLOWED_FILE_TYPES, ) cluster.create_data_frame() self.assertTrue(cluster.validate_numeric_annots)
def test_coerce_numeric_values(self): cm = Annotations( "../tests/data/metadata_example.txt", ["text/csv", "text/plain", "text/tab-separated-values"], ) cm.create_data_frame() cm.file = Annotations.coerce_numeric_values(cm.file, cm.annot_types) dtype = cm.file.dtypes[("Average Intensity", "numeric")] self.assertEqual(dtype, np.float) # Test that numeric values wer # Pick a random number between 1 and amount of lines in file ran_num = random.randint(1, 20) for column in cm.file.columns: annot_type = column[1] if annot_type == "numeric": value = str(cm.file[column][ran_num]) print(Decimal(value).as_tuple().exponent) assert ( abs(Decimal(value).as_tuple().exponent) >= self.EXPONENT ), "Numbers did not round to 3 or less decimals places" # Test for string in numeric column cm_has_bad_value = Annotations( "../tests/data/metadata_bad_contains_str_in_numeric_column.txt", ["text/csv", "text/plain", "text/tab-separated-values"], ) cm_has_bad_value.create_data_frame() self.assertRaises( ValueError, Annotations.coerce_numeric_values, cm_has_bad_value.file, cm_has_bad_value.annot_types, )
def test_low_mem_artifact(self): # pandas default of low_memory=True allows internal chunking during parsing # causing inconsistent dtype coercion artifact for larger annotation files lmtest = Annotations( "../tests/data/low_mem_unit.txt", ["text/csv", "text/plain", "text/tab-separated-values"], ) lmtest.preprocess() # when low memory=True, the first row in the file would be in the first chunk # and the numeric value was not properly coerced to become a string assert isinstance( lmtest.file["mixed_data"]["group"][0], str ), "numeric value should be coerced to string" # Per SCP-2545 NA values become strings for group annotations. print(lmtest.file["mixed_data"]["group"][2]) print(type(lmtest.file["mixed_data"]["group"][2])) assert isinstance( lmtest.file["mixed_data"]["group"][2], str ), "expect empty cell conversion to NaN is string for group annotation" # numeric value in second chunk should still properly be coerced to string type assert isinstance( lmtest.file["mixed_data"]["group"][32800], str ), "numeric value should be coerced to string"
def __init__(self, image_dir, gt_path, seqname=None, trackers=[]): if (seqname == None): # Assumed data format is /path/to/data/seqname.{csv|txt} self.seqname = gt_path.split('/')[-1][:-4] else: self.seqname = seqname self.gt_annotations = Annotations(gt_path, seqname=seqname) self.tracker_res = {} for i in trackers: try: self.tracker_res[i.name] = i.get_res_of(seqname) except: print(self.seqname, 'not available for', i.name) self.img_dir = image_dir self.images = [ i for i in os.listdir(image_dir) if i[-4:] == '.png' or i[-4:] == '.jpg' ] self.images.sort() height, width, layers = cv2.imread( os.path.join(image_dir, self.images[0])).shape self.height = height self.width = width self.size = (width, height) self.obj_size = self.gt_annotations.obj_size
def test_annotations(): a = Annotations(foo='bar', zoo=['zing','zaboo'], species='Platypus') sa = a.toSynapseAnnotations() # print sa assert sa['stringAnnotations']['foo'] == ['bar'] assert sa['stringAnnotations']['zoo'] == ['zing','zaboo'] assert sa['stringAnnotations']['species'] == ['Platypus']
def test_more_annotations(): a = Annotations(foo=1234, zoo=[123.1, 456.2, 789.3], species='Platypus', birthdays=[datetime(1969,4,28), datetime(1973,12,8), datetime(2008,1,3)]) sa = a.toSynapseAnnotations() # print sa assert sa['longAnnotations']['foo'] == [1234] assert sa['doubleAnnotations']['zoo'] == [123.1, 456.2, 789.3] assert sa['stringAnnotations']['species'] == ['Platypus'] bdays = [datetime.utcfromtimestamp(t) for t in sa['dateAnnotations']['birthdays']] assert bdays == [datetime(1969,4,28), datetime(1973,12,8), datetime(2008,1,3)]
def parse_and_generate(filename, out_filename=None, init_filename=None, include_paths=[], defines=[]): """ parse the file at filename. if out_filename and init_filename are None:return a tuple containing the generated file's names. otherwise return the generated source code for each """ from os import path if out_filename: out_filename = re.sub(FILENAME_EXPR, out_filename, filename) if init_filename: init_filename = re.sub(FILENAME_EXPR, init_filename, filename) rel_filename = '' if out_filename is None and init_filename is None: rel_filename = re.sub(FILENAME_EXPR, r'\g<basename>.h', path.basename(filename)) else: init_dir = path.dirname(init_filename) rel_filename = path.relpath(out_filename, init_dir) ast, text = parse_jstruct(filename, include_paths=include_paths, defines=defines) annotations = Annotations(text) try: annotations.expand(ast, '<stdin>') except ExpansionError as ex: ex.filename = filename raise prune_ast(ast, '<stdin>') out_ast, init_ast = split_ast(ast) generator = CGenerator() out_result = generator.visit(out_ast) init_result = generator.visit(init_ast) if GUARD_HEADERS_EXPR.search(out_result): out_result = re.sub( GUARD_HEADERS_EXPR, r'\g<0>' + GENERATED, out_result, count=1 ) + '\n#endif\n' else: out_result = GENERATED + out_result init_result = re.sub(GUARD_HEADERS_EXPR, '', init_result) init_instructions = INIT_INSTRUCTIONS if init_filename and init_filename.endswith('.h') else '' init_result = GENERATED1NL + init_instructions + INCLUDE_H(rel_filename) + init_result if out_filename: with open(out_filename, 'w') as out_file: out_file.write(out_result) if init_filename: with open(init_filename, 'w') as init_file: init_file.write(init_result) if out_filename is None and init_filename is None: return (out_result, init_result) else: return (out_filename, init_filename)
def test_mixed_annotations(): """test that toSynapseAnnotations will coerce a list of mixed types to strings""" a = Annotations(foo=[1, 'a', datetime(1969,4,28,11,47)]) sa = a.toSynapseAnnotations() # print sa a2 = Annotations.fromSynapseAnnotations(sa) # print a2 assert a2['foo'][0] == '1' assert a2['foo'][1] == 'a' assert a2['foo'][2].find('1969') > -1
def test_leading_zeros(self): """Ensures leading zeros are not stripped from group annotations""" path = "../tests/data/metadata_convention_with_leading_0s.tsv" annotation = Annotations( path, ["text/csv", "text/plain", "text/tab-separated-values"] ) annotation.preprocess() # Grab value from donor id column. value_with_leading_zeros = annotation.file.iloc[ :, annotation.file.columns.get_level_values(0) == "donor_id" ].values.item(0) self.assertTrue(value_with_leading_zeros.startswith("0"))
def test_duplicate_headers(self): """Annotation headers should not contain duplicate values """ dup_headers = Annotations( "../tests/data/dup_headers_v2.0.0.tsv", ["text/csv", "text/plain", "text/tab-separated-values"], ) self.assertFalse( dup_headers.validate_unique_header(), "Duplicate headers should fail format validation", ) with self.assertRaises(ValueError): dup_headers.preprocess()
def main(): r""" Runs trcls. """ parser = cli.get_parser() args = parser.parse_args() logger = setup_logging(args) if args.version: print('trcls {}'.format(VERSION)) exit(0) if args.alignment == None or args.features == None: logger.error( 'Both SAM alignment and GTF annotation files must be provided') parser.print_help() exit(1) with open(args.features) as features_file: annotations = Annotations(features_file) with open(args.alignment) as alignment_file: alignments = alignment_file.readlines() headers = filter(lambda l: l.startswith('@'), alignments) headers = map(str.strip, headers) alignments = filter(lambda l: not l.startswith('@'), alignments) transcripts = get_transcripts(alignments, args.skip_tolerance, args.map_tolerance) print('\n'.join(headers)) for transcript in transcripts: transcript.annotate(annotations, args.junction_tolerance) print(transcript)
def apply(binary_path: str, sig_path: str) -> Tuple[int, str]: """ Applies signatures in specified file to specified binary, and writes resulting bndb to disk :param binary_path: path of binary to apply signatures to :param sig_path: path of signature file to read in :return: tuple (int count of function signatures matched, str path to BNDB with tags that was created) """ bv = binja.BinaryViewType.get_view_of_file(binary_path) print("Loaded binary {} into Binary Ninja.".format(binary_path)) functions = hash_all(bv) print("{} functions in binary have been hashed.".format(len(functions))) data = read_json(sig_path) signatures = {} for raw_hash in data: # only bother with functions that actually have tags if len(data[raw_hash]) > 0: signatures[raw_hash] = Annotations(raw_data=data[raw_hash]) print("Signature file {} loaded into memory.".format(sig_path)) num_func_sigs_applied = 0 for function_hash in functions: if function_hash in signatures: tag_function(bv, functions[function_hash], function_hash, signatures) print('Located a match at {}!'.format(function_hash)) num_func_sigs_applied += 1 output_bndb = os.path.join(os.getcwd(), binary_path + '.bndb') print("Writing output Binary Ninja database at {}".format(output_bndb)) bv.create_database(output_bndb) return num_func_sigs_applied, output_bndb
def _getAnnotations(self, entity): entity_id = entity['id'] if 'id' in entity else str(entity) url = '%s/entity/%s/annotations' % (self.repoEndpoint, entity_id,) response = requests.get(url, headers=self.headers) response.raise_for_status() return Annotations.fromSynapseAnnotations(response.json())
def test_create_columns(self): header = ["Intensity", "donor_id", "species__ontology_label"] annotatiion_types = ["numeric", "group", "group"] colums = Annotations.create_columns(header, annotatiion_types) expected = [ ("Intensity", "numeric"), ("donor_id", "group"), ("species__ontology_label", "group"), ] self.assertEqual(colums, expected)
def test_convert_header_to_multiIndex(self): expected = [ ("Name", "TYPE"), ("X", "numeric"), ("Y", "numeric"), ("Z", "numeric"), ("Average Intensity", "numeric"), ] path = "../tests/data/good_subsample_cluster.csv" annotation = Annotations( path, ["text/csv", "text/plain", "text/tab-separated-values"] ) df = annotation.open_file( path, open_as="dataframe", skiprows=2, names=annotation.headers )[0] new_df = Annotations.convert_header_to_multi_index(df, expected) # Remove white spaces new_df_columns = [tuple(s.strip() for s in y) for y in new_df.columns] self.assertEqual(new_df_columns, expected)
def tosling(self, filename): documents = [] annotations = Annotations(self) input_stats = self.summary.input # Callback that will be invoked for each SLING document that is built. # This could be for each sentence or each document part, as specified. def callback(document): documents.append(document) with open(filename, "r") as f: input_stats.files.increment() lines = f.readlines() for line in lines: annotations.read(line, callback) for document in documents: self._add_output_statistics(document) return documents
def __init__( self, file_path: str, study_id: ObjectId, study_file_id: ObjectId, *args, **kwargs, ): self.study_accession = kwargs.pop("study_accession") Annotations.__init__(self, file_path, self.ALLOWED_FILE_TYPES, study_id, study_file_id) self.cell_names = [] # lambda below initializes new key with nested dictionary as value and avoids KeyError self.issues = defaultdict( lambda: defaultdict(lambda: defaultdict(list))) self.ontology = defaultdict(lambda: defaultdict(list)) self.ontology_label = dict() self.cells = [] self.numeric_array_columns = {}
def test_header_format(self): """Header rows of metadata file should conform to standard """ error_headers = Annotations( "../tests/data/error_headers_v2.0.0.tsv", ["text/csv", "text/plain", "text/tab-separated-values"], ) self.assertFalse( error_headers.validate_header_keyword(), "Missing NAME keyword should fail format validation", ) self.assertFalse( error_headers.validate_type_keyword(), "Missing TYPE keyword should fail format validation", ) self.assertFalse( error_headers.validate_type_annotations(), "Invalid type annotations should fail format validation", )
def _setAnnotations(self, entity, annotations): entity_id = entity['id'] if 'id' in entity else str(entity) url = '%s/entity/%s/annotations' % (self.repoEndpoint, entity_id,) a = annotations.toSynapseAnnotations() a['id'] = entity_id if 'etag' in entity and 'etag' not in a: a['etag'] = entity['etag'] response = requests.put(url, data=json.dumps(a), headers=self.headers) response.raise_for_status() return Annotations.fromSynapseAnnotations(response.json())
def test_idempotent_annotations(): """test that toSynapseAnnotations won't mess up a dictionary that's already in synapse-style form""" a = Annotations(species='Moose', n=42, birthday=datetime(1969,4,28)) sa = a.toSynapseAnnotations() a2 = Annotations() a2.update(sa) sa2 = a2.toSynapseAnnotations() assert sa == sa2
def read_tags(bv: Binary_View, hashes: Dict[str, Function]) -> Dict[str, Annotations]: """ Gathers tag locations from every function in the binary. :param bv: BinaryView that contains the analysis results :param hashes: a dictionary mapping hashes to their functions :return: dictionary representing all tags in the current binary """ tagged_dict = {} # TODO: switch to use GetAllTagReferences once it's available in the python API for O(1) access times for hash_value in hashes: function = hashes[hash_value] tagged_dict[hash_value] = Annotations(function=function, bv=bv) return tagged_dict
def test_get_cell_names(self): import pandas as pd expected_cell_names = [ "CELL_0001", " CELL_0002", " CELL_0003", " CELL_0004", " CELL_0005", " CELL_0006", " CELL_0007", " CELL_0008", " CELL_0009", " CELL_00010", " CELL_00011", " CELL_00012", " CELL_00013", " CELL_00014", " CELL_00015", " CELL_00016", " CELL_00017", " CELL_00018", " CELL_00019", " CELL_00020", ] column_names = [ ("NAME", "TYPE"), ("Cluster", "group"), ("Sub-Cluster", "group"), ("Average Intensity", "numeric"), ] index = pd.MultiIndex.from_tuples(column_names) df = pd.read_csv( "../tests/data/metadata_example.txt", sep="\t", names=index, skiprows=2 ) cells = Annotations.get_cell_names(df) self.assertEqual(cells, expected_cell_names)
class RendererMarkdown: def __init__(self, output_dir): self.output_dir = Path(output_dir) self.annotations = Annotations("annotations-bitcoin-0.18.json") def add_version_note(self, page): if "added" in self.annotation: page.text("*Added in Bitcoin Core %s*\n" % self.annotation["added"]) def add_wallet_note(self, page): if "wallet" in self.annotation: if self.annotation["wallet"]: page.text("*Requires wallet support.*\n") def add_see_also_command(self, page, command): name = display_name(command) lower_name = uncapitalize(name) page.text("* [%s][rpc %s]: {{summary_%s}}" % (name, command, lower_name)) def add_see_also_glossary(self, page, text, link): page.text("* [%s][/en/glossary/%s]" % (text, link)) def add_see_also_message(self, page, message, text): page.text("* [`%s` message][%s message]: %s" % (message, message, text)) def add_see_also(self, page): if "see_also" in self.annotation: page.text("*See also*\n") see_also = self.annotation["see_also"] if "commands" in see_also: for command in see_also["commands"]: self.add_see_also_command(page, command) if "glossary" in see_also: for glossary in see_also["glossary"]: self.add_see_also_glossary(page, glossary[1], glossary[0]) if "messages" in see_also: for message in see_also["messages"]: self.add_see_also_message(page, message[0], message[1]) page.nl() def arg_summary(self, arg): return arg["name"] def arg_n(self, arg): return arg["name"] def arg_t(self, arg): t = arg["type"].split(", ")[0] if t == "numeric": t = "number (int)" if "args" in self.annotation: args = self.annotation["args"] if arg["name"] in args: arg_annotation = args[arg["name"]] if "type" in arg_annotation: t += " (%s)" % arg_annotation["type"] return t def arg_p(self, arg): arg_line = arg["type"].split(", ") if len(arg_line) == 1: return "Required" else: p = arg_line[1] if p == "required": return "Required<br>(exactly 1)" elif p == "optional": if len(arg_line) == 3: return "Optional<br>" + capitalize(arg_line[2]) else: return "Optional" else: return p def arg_d(self, arg): d = arg["description"] if "args" in self.annotation: args = self.annotation["args"] if arg["name"] in args: arg_annotation = args[arg["name"]] if "description" in arg_annotation: d += ". " + arg_annotation["description"] return d def result_t(self, result): t = result["type"] if t == "numeric": t = "number (int)" elif t == "string": t += " (hex)" return t def result_null(self): return '''*Result---`null` on success* {% itemplate ntpd1 %} - n: "`result`" t: "null" p: "Required<br>(exactly 1)" d: "JSON `null` when the command was successfull or a JSON with an error field on error." {% enditemplate %} ''' def yaml_escape(self, text): return text.replace('"', '\\"') def guarded_code_block(self, block): return "{% endautocrossref %}\n\n" + self.code_block( block) + "\n{% autocrossref %}\n" def code_block(self, block): min_indentation = 999 split_block = block.splitlines() for line in split_block: indentation = len(line) - len(line.lstrip(" ")) if indentation < min_indentation: min_indentation = indentation indented_block = "" for line in split_block: if min_indentation <= 4: indented_block += " " * (4 - min_indentation) + line else: indented_block += line[min_indentation - 4:] indented_block += "\n" if not indented_block.endswith("\n"): indented_block += "\n" return indented_block def add_license_header(self, page): with page.tag("comment"): page.text( "This file is licensed under the MIT License (MIT) available on\n" "http://opensource.org/licenses/MIT.") def split_description(self, full_description): if "summary" in self.annotation: summary = self.annotation["summary"] description = full_description elif full_description: if "." in full_description: summary = uncapitalize( full_description.partition(".")[0]) + "." description = full_description[len(summary) + 1:].lstrip() else: summary = uncapitalize(full_description.rstrip()) + "." description = "" summary = " ".join(summary.splitlines()) else: summary = "does %s." % display_name(self.command) description = None return summary, description def process_command_help(self, help_data): self.help_data = help_data # print(help_data) self.command = help_data["command"].split(" ")[0] self.annotation = self.annotations.annotation(self.command) page = Page() self.add_license_header(page) name = display_name(self.command) lower_name = name[0].lower() + name[1:] page.tag( "assign", 'filename="_data/devdocs/en/bitcoin-core/rpcs/rpcs/%s.md"' % self.command) title = "\n##### %s" % name if self.command == "ping": title += " {#ping-rpc}" suffix = "-rpc" else: suffix = "" page.text(title) page.tag("include", "helpers/subhead-links.md") page.nl() summary, description = self.split_description(help_data["description"]) page.tag("assign", 'summary_%s%s="%s"' % (lower_name, suffix, summary)) page.nl() with page.tag("autocrossref"): page.nl() self.add_version_note(page) self.add_wallet_note(page) page.text("The `%s` RPC {{summary_%s%s}}\n" % (self.command, lower_name, suffix)) if description: quoted = False for line in description.splitlines(): if line.startswith(" "): if not quoted: page.text("{% endautocrossref %}") page.nl() quoted = True elif quoted: page.nl() page.text("{% autocrossref %}") quoted = False page.text(line) if quoted: page.nl() page.text("{% autocrossref %}") page.nl() if "arguments" in help_data: if not help_data["arguments"]: page.text("*Parameters: none*\n") else: count = 1 for arg in help_data["arguments"]: page.text("*Parameter #%s---%s*\n" % (count, self.arg_summary(arg))) with page.tag("itemplate", "ntpd1"): page.text('- n: "%s"' % self.arg_n(arg)) page.text(' t: "%s"' % self.arg_t(arg)) page.text(' p: "%s"' % self.yaml_escape(self.arg_p(arg))) page.text(' d: "%s"' % self.yaml_escape(self.arg_d(arg))) page.nl() page.nl() if "literal_description" in arg: page.text( self.guarded_code_block( arg["literal_description"])) count += 1 if help_data["results"] == [{ 'title_extension': '' }] or help_data["results"] == []: page.text(self.result_null()) else: for result in help_data["results"]: result_header = "*Result" if "title_extension" in result and result[ "title_extension"]: result_header += "---" + \ result["title_extension"].lstrip() result_header += "*\n" page.text(result_header) if result["format"] == "literal": page.text(self.guarded_code_block(result["text"])) else: with page.tag("itemplate", "ntpd1"): page.text('- n: "%s"' % "`result`") page.text(' t: "%s"' % self.result_t(result)) page.text(' p: "Required<br>(exactly 1)"') page.text(' d: "%s"' % self.yaml_escape(result["description"])) page.nl() page.nl() if help_data["examples"]: page.text("*Example*\n") for example in help_data["examples"]: if example.startswith("> "): if not example.startswith("> curl"): with page.tag("highlight", "bash"): page.text(example[2:].rstrip()) else: if (not example.startswith("As json rpc") and not example.startswith("As a json rpc") and not example.startswith("As a JSON-RPC")): page.text(example) page.nl() page.nl() self.add_see_also(page) return page.out def render_cmd_page(self, command, help_data): command_file = command + ".md" with open(self.output_dir / "rpcs" / command_file, "w") as file: file.write(self.process_command_help(help_data)) def add_version_helper_assignment(self, page, type, version, bold=False): a = type.upper() + version.replace(".", "_") + "='*" if bold: a += "*" a += '<abbr title="' + type + ' in Bitcoin Core v' + version + '">' a += type + ' in ' + version + '</abbr>*' if bold: a += "*" a += "'" page.tag("assign", a) def add_version_helpers(self, page, version, date, new=False, updated=True, bold=False): page.text("<!-- Bitcoin Core %s %s -->" % (version, date)) if new: self.add_version_helper_assignment(page, "New", version, bold=bold) if updated: self.add_version_helper_assignment(page, "Updated", version, bold=bold) page.nl() def render_version_info(self, page): with page.tag("comment"): page.text( """Styling notes: use highly-visible style for upcoming changes (not yet released) and changes made in the last 6 months. Use less-visible style for changes made up to two years ago. Don't point out changes made more than two years ago. Use v0.n.n in abbreviation title to prevent autocrossrefing.""") page.nl() page.text("<!-- Deprecated -->") page.tag( "assign", "DEPRECATED='**<abbr title=\"Deprecated; will be removed in a future version of Bitcoin Core\">Deprecated</abbr>**'" ) self.add_version_helpers(page, "0.14.1", "April 2017", bold=True) self.add_version_helpers(page, "0.14.0", "March 2017", new=True, bold=True) self.add_version_helpers(page, "0.13.1", "September 2016") self.add_version_helpers(page, "0.13.0", "August 2016", new=True) self.add_version_helpers(page, "0.12.1", "April 2016") self.add_version_helpers(page, "0.12.0", "February 2016", new=True) self.add_version_helpers(page, "0.11.0", "July 2015", new=True, updated=False) def render_overview_page(self, all_commands, render_version_info=True): with open(self.output_dir / "quick-reference.md", "w") as file: page = Page() self.add_license_header(page) page.tag( "assign", 'filename="_data/devdocs/en/bitcoin-core/rpcs/quick-reference.md"' ) page.nl() page.text("#### Quick Reference {#rpc-quick-reference}") page.tag("include", "helpers/subhead-links.md") page.nl() if render_version_info: self.render_version_info(page) page.text( """<!-- the summaries used below are defined in the files for the particular RPC and aggregated into this helper file by the makefile function manual-update-summaries-file. For example, to edit the summary for GetBestBlockHash, edit _includes/rpc/getbestblockhash.md and run `make manual-update-summaries`. -->""" ) page.tag("include", "helpers/summaries.md") page.nl() for category in all_commands: page.text("#### " + category + " RPCs") page.text("{:.no_toc}") page.text("<!-- no subhead-links here -->\n") with page.tag("autocrossref"): page.nl() if category == "Wallet": page.text( """**Note:** the wallet RPCs are only available if Bitcoin Core was built with [wallet support][]{:#term-wallet-support}{:.term}, which is the default. """) for command in all_commands[category]: cmd = command.split(" ")[0] item = "* [" + display_name(cmd) + "]" item += "[rpc " + cmd + "]: " item += "{{summary_" + uncapitalize(display_name(cmd)) if cmd == "ping": item += "-rpc" item += "}}" if render_version_info: annotation = self.annotations.annotation(cmd) if "added" in annotation: item += " {{NEW%s}}" % annotation[ "added"].replace(".", "_") if "added" in annotation and "changed" in annotation: item += "," if "changed" in annotation: item += " {{UPDATED%s}}" % annotation[ "changed"].replace(".", "_") if "deprecated" in annotation: item += " {{DEPRECATED}}" page.text(item) page.nl() page.nl() file.write(page.out)
def clean_annotations(self, annotations_file): annotations = Annotations(annotations_file) annotations.clean_annotations()
def import_see_also(self, markdown_dir, annotations_file): annotations = Annotations(annotations_file) annotations.import_see_also(markdown_dir)
def flna_annotations(): with open('test/FLNA.gtf') as gtf_file: return Annotations(gtf_file)
def generate_lawbook(name): ANNOTATIONS = ANNOTATIONS_MAP.get(name, Annotations(list())) with open(os.path.join(STATIC_DIR, "%s.html" % name), "w+", encoding="utf-8") as fp: fp.write("""<html> <head> <title> %s </title> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/css/bootstrap.min.css"> <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script> <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/js/bootstrap.min.js"></script> <link href="css/gesetze.css" rel="stylesheet" title="Default Style"> </head> <body> """ % name) data = read_json(name) fp.write("<h1>%s</h1>" % name) for entry in data: if entry["type"] == "section": fp.write("<h3>%s</h3>" % entry["title"]) else: paragraph, title = entry["norm"], entry.get("title", "") if title is None: title = "" # print("Writing %s %s" % (paragraph, name)) anchor = "<a id='#%s'></a>" % entry["norm"] fp.write("<div class='norm'>") fp.write( "<div class='normhead%s'>%s %s</div> %s" % (" marked" if ANNOTATIONS.is_marked(paragraph) else "", paragraph, title, anchor)) fp.write("<div class='normtext'>") for absatz in entry["paragraphs"]: fp.write("<div class='abs'>%s" % (absatz["text"])) subs = absatz["sub"] if subs: fp.write("<div class='subbox'>") for i, sub in enumerate(subs): fp.write("<div class='sub'>%d. %s" % (i + 1, sub["text"])) subsubs = sub["sub"] if subsubs != []: fp.write("<div class='subsubbox'>") letters = lit_gen() for subsub in subsubs: fp.write( "<div class='subsub'>%s) %s</div>" % (next(letters), subsub["text"])) fp.write("</div>") # .subsubbox fp.write("</div>") # .sub fp.write("</div>") # .subbox fp.write("</div>") # .abs fp.write("</div>") # .normtext fp.write("</div>") # .norm fp.write("</body> </html>")
class SubSample(Annotations): ALLOWED_FILE_TYPES = [ "text/csv", "text/plain", "text/tab-separated-values" ] MAX_THRESHOLD = 100_000 SUBSAMPLE_THRESHOLDS = [MAX_THRESHOLD, 20_000, 10_000, 1_000] def __init__(self, cluster_file, cell_metadata_file=None): Annotations.__init__(self, cluster_file, self.ALLOWED_FILE_TYPES) self.preprocess() self.determine_coordinates_and_cell_names() if cell_metadata_file is not None: self.cell_metadata = Annotations(cell_metadata_file, CellMetadata.ALLOWED_FILE_TYPES) @staticmethod def has_cells_in_metadata_file(metadata_cells, cluster_cells): """Checks if cells in cluster are in metadata cells""" return set(cluster_cells).issubset(set(metadata_cells)) def prepare_cell_metadata(self): """ Does an inner join on cell and cluster file """ if self.cell_metadata is not None: self.cell_metadata.preprocess() self.merge_df(self.file[self.coordinates_and_cell_headers], self.cell_metadata.file) self.determine_coordinates_and_cell_names() def bin(self, annotation: Tuple[str, str], scope: str): """Creates bins for a given group Args: annotation: Tuple[str, str] This is the annotation for a single column. For example annotation would look like ('annotation_name', 'numeric') or ('annotation_name', 'group') Returns: bin: Tuple[Dict[str: dataframe]], Tuple[str, str]] The first tuple contains all the bins for a given column/ annotation. It would look like {'unique_value1': filtered dataframe where rows=unique_value1} for group values and there can be up to 20 bins for numeric columns. The second value in the tuple is structured exactly like the input value. """ bin = {} # sample the annotation along with coordinates and cell names columns_to_sample = copy.copy(self.coordinates_and_cell_headers) if scope == "cluster": columns_to_sample.append(annotation[0]) if "group" in annotation: # get unique values in column unique_values = self.file[annotation].unique() for col_val in unique_values: # get subset of data where row is equal to the unique value subset = self.file[self.file[annotation] == col_val] bin[col_val] = subset[columns_to_sample] else: columns = copy.copy(self.coordinates_and_cell_headers) # coordinates, cell names and annotation name columns.append(annotation[0]) # Subset of df where header is [cell_names, x, y, z, <annot_name>] subset = self.file[columns].copy() subset.sort_values(by=[annotation], inplace=True) # Generates 20 bins for index, df in enumerate(np.array_split(subset, 20)): bin[str(index)] = df[columns_to_sample] return bin, annotation def subsample(self, scope): """Subsamples groups across a given file""" sample_sizes = [ sample_size for sample_size in self.SUBSAMPLE_THRESHOLDS if sample_size < len(self.file.index) ] for bins in [ self.bin(col, scope) for col in self.annot_column_headers ]: amount_of_bins = len(bins[0].keys()) # (name of current column) annotation_name = bins[1] # Holds bins for annotation # Looks like {"Unique value #1" : dataframe, "Unique value #2": dataframe,...} annotation_dict = bins[0] for sample_size in sample_sizes: group_size = len(annotation_dict.keys()) # Dict of values for the x, y, and z coordinates points = {k: [] for k in self.coordinates_and_cell_headers} if scope == "cluster": points[annotation_name[0]] = [] num_per_group = int(sample_size / group_size) cells_left = sample_size # bin = ("unique value in column" : dataframe) for idx, bin in enumerate( self.return_sorted_bin(annotation_dict, annotation_name)): amount_of_rows = len(bin[1].index) # If the amount of sampled values is larger # than the whole array, take the whole array if num_per_group > amount_of_rows: amount_picked_rows = amount_of_rows else: amount_picked_rows = num_per_group shuffled_df = (bin[1].reindex( np.random.permutation( bin[1].index)).sample(n=amount_picked_rows)) for column in shuffled_df: points[column[0]].extend( shuffled_df[column].values.tolist()) # add the current observed annotation to the points dict the amount # of times it has been sampled # points[annotation_name] = [bin[0] for i in range(amount_picked_rows)] # Subtract number of cells 'subsampled' from the number of cells left cells_left -= amount_picked_rows # For last bin sample the number of cells left over # Subtract 2 because 0 based if idx == (amount_of_bins - 2): num_per_group = cells_left else: group_size -= 1 if group_size > 1: num_per_group = int(cells_left / group_size) # returns tuple = (subsampled values as dictionary, annotation name, sample size ) yield (points, annotation_name, sample_size) def return_sorted_bin(self, bin, annot_name): """Sorts binned groups in order of size from smallest to largest for group annotations """ if "group" in annot_name: return sorted(bin.items(), key=lambda x: len(x[1])) else: return bin.items() def set_data_array(self, args, kwargs): return Clusters.set_data_array(*args, **kwargs)
class Sequence: def __init__(self, image_dir, gt_path, seqname=None, trackers=[]): if (seqname == None): # Assumed data format is /path/to/data/seqname.{csv|txt} self.seqname = gt_path.split('/')[-1][:-4] else: self.seqname = seqname self.gt_annotations = Annotations(gt_path, seqname=seqname) self.tracker_res = {} for i in trackers: try: self.tracker_res[i.name] = i.get_res_of(seqname) except: print(self.seqname, 'not available for', i.name) self.img_dir = image_dir self.images = [ i for i in os.listdir(image_dir) if i[-4:] == '.png' or i[-4:] == '.jpg' ] self.images.sort() height, width, layers = cv2.imread( os.path.join(image_dir, self.images[0])).shape self.height = height self.width = width self.size = (width, height) self.obj_size = self.gt_annotations.obj_size def get_frame(self, frame_num, scale_bb=1, sort_order='x', off_x=0, off_y=0, show_boxes=True, boxColor=0, show_text=True, textColor=0, save_path=None): img_path = os.path.join(self.img_dir, self.images[frame_num]) try: with Image.open(img_path) as image: #print("Mode", image.mode, type(image.mode)) if (image.mode != 'L'): image = image.convert(mode='L') draw = ImageDraw.Draw(image, 'L') dets = self.gt_annotations.get_frame(frame_num + 1)[:, 1:].astype(np.float) if len(dets) == 0: if (save_path is not None): image.save(save_path) return image # Format: [frame, ID, x1, y1, width, height, obj_class, species, occluded, noisy_frame] labels = None #print('sort_order', sort_order) if (sort_order == 'x+y'): d = dets[np.argsort(dets[:, 2] + dets[:, 1])] else: d = dets[np.lexsort((dets[:, 2], dets[:, 1]))] ids = d[:, 0] #print(ids) h, w = image.size #frame_text = "Frame: " + frame_num + "ids on screen: \n" + str(ids) + "\n count = " + str(len(ids)) frame_text = "Frame: {0}\nids on screen: \n{1}\n count = {2}".format( frame_num, ids, len(ids)) if d.shape[1] > 8 and d.shape[0] > 0 and d[0, 8] == 1: # Noisy frame frame_text = "Noisy Frame\n" + frame_text draw.text((0, 0), frame_text, (textColor)) #(0)) # Custom Offset dets[:, 1] += off_x dets[:, 2] += off_y # Scale the boxes: dets[:, 1] -= dets[:, 3] * ((scale_bb - 1) / 2) dets[:, 2] -= dets[:, 4] * ((scale_bb - 1) / 2) dets[:, 3:5] = dets[:, 3:5] * scale_bb # Convert from [x1, y1, width, height] to [x1, y1, x2, y2] dets[:, 3:5] += dets[:, 1:3] ids = [] species = { '-1': "Unknown", '0': "Human", '1': "Elephant", '2': "Lion", '3': "Giraffe", '4': "Dog" } for i, d in enumerate(dets): if (show_boxes): boxcolor = boxColor if d.shape[0] > 7 and d[7] == 1: boxcolor = 255 - boxColor draw.rectangle([d[3], d[4], d[1], d[2]], outline=(boxcolor)) #c) #ids.append(d[0]) d = d.astype(np.int32) if (show_text): boxTag = str(d[0]) if len(d) > 6 and d[5] == 0: # animal boxTag += '-' + species[str(d[6])] draw.text((d[1], d[2] - 10), boxTag, (textColor)) if (save_path is not None): image.save(save_path) return image except Exception as e: #print(str(e)) print("error in redrawing image") raise e def get_frame_w_trackers(self, frame_num, scale_bb=1, sort_order='x', off_x=0, off_y=0, show_boxes=True, boxColor=0, show_text=True, textColor=0, save_path=None): img_path = os.path.join(self.img_dir, self.images[frame_num]) det_matrix = { 'GT': self.gt_annotations.get_frame(frame_num)[:, 1:].astype(np.float) } colors = {'GT': [textColor, boxColor]} index = 1 for i in self.tracker_res: det_matrix[i] = self.tracker_res[i].get_frame( frame_num)[:, 1:].astype(np.float) colors[i] = [COLOR[index], COLOR[index]] index += 1 try: with Image.open(img_path) as image: #print("Mode", image.mode, type(image.mode)) if (image.mode != 'RGB'): image = image.convert(mode='RGB') draw = ImageDraw.Draw(image) frame_text = "Frame: {0}".format(frame_num) d = det_matrix['GT'] if d.shape[1] > 8 and d.shape[0] > 0 and d[0, 8] == 1: # Noisy frame frame_text += " (Noisy Frame)" draw.text((0, 0), frame_text, (textColor)) #(0)) text_start = draw.textsize(frame_text)[1] + 1 for e in det_matrix: dets = det_matrix[e] textColor = colors[e][0] boxColor = colors[e][1] #dets = self.gt_annotations.get_frame(frame_num)[:,1:].astype(np.float) if len(dets) == 0: if (save_path is not None): image.save(save_path) continue #return image # Format: [frame, ID, x1, y1, width, height, obj_class, species, occluded, noisy_frame] labels = None #print('sort_order', sort_order) if (sort_order == 'x+y'): d = dets[np.argsort(dets[:, 2] + dets[:, 1])] else: d = dets[np.lexsort((dets[:, 2], dets[:, 1]))] ids = d[:, 0] #print(ids) h, w = image.size #frame_text = "Frame: " + frame_num + "ids on screen: \n" + str(ids) + "\n count = " + str(len(ids)) frame_text = "{0}: {1} count = {2}".format( e, ids, len(ids)) draw.text((0, text_start), frame_text, (textColor)) #(0)) text_start += draw.textsize(frame_text)[1] + 1 # Custom Offset dets[:, 1] += off_x dets[:, 2] += off_y # Scale the boxes: dets[:, 1] -= dets[:, 3] * ((scale_bb - 1) / 2) dets[:, 2] -= dets[:, 4] * ((scale_bb - 1) / 2) dets[:, 3:5] = dets[:, 3:5] * scale_bb # Convert from [x1, y1, width, height] to [x1, y1, x2, y2] dets[:, 3:5] += dets[:, 1:3] ids = [] species = { '-1': "Unknown", '0': "Human", '1': "Elephant", '2': "Lion", '3': "Giraffe", '4': "Dog" } for i, d in enumerate(dets): if (show_boxes): boxcolor = boxColor if d.shape[0] > 7 and d[7] == 1: boxcolor = 255 - boxColor draw.rectangle([d[3], d[4], d[1], d[2]], outline=(boxcolor)) #c) #ids.append(d[0]) d = d.astype(np.int32) if (show_text): boxTag = str(d[0]) if len(d) > 6 and d[5] == 0: # animal boxTag += '-' + species[str(d[6])] draw.text((d[1], d[2] - 10), boxTag, (textColor)) if (save_path is not None): image.save(save_path) return image except Exception as e: #print(str(e)) print("error in redrawing image") raise e def generate_video(self, output_dir, fps=15, start_frame=0, end_frame=None): if end_frame is None: end_frame = len(self.images) if start_frame > len( self.images) or end_frame < start_frame or end_frame > len( self.images): print("Invalid input", start_frame, end_frame, "number of images is", len(self.images)) return outpath = os.path.join(output_dir, self.seqname + '.avi') out = cv2.VideoWriter(outpath, cv2.VideoWriter_fourcc(*"XVID"), float(fps), self.size) #for i in tqdm(range(start_frame,end_frame)): for i in range(start_frame, end_frame): img_path = './output-imgs/' + self.images[i] #out.write(np.asarray(self.get_frame(i))) self.get_frame(i, save_path=img_path) out.write(cv2.imread(img_path)) out.release() #print('Written video to', outpath) def generate_video_w_trackers(self, output_dir, fps=15, start_frame=0, end_frame=None): if end_frame is None: end_frame = len(self.images) if start_frame > len( self.images) or end_frame < start_frame or end_frame > len( self.images): print("Invalid input", start_frame, end_frame, "number of images is", len(self.images)) return outpath = os.path.join(output_dir, self.seqname + '.avi') out = cv2.VideoWriter(outpath, cv2.VideoWriter_fourcc(*"XVID"), float(fps), self.size) #for i in tqdm(range(start_frame,end_frame)): for i in range(start_frame, end_frame): img_path = './output-imgs/' + self.images[i] #out.write(np.asarray(self.get_frame(i))) self.get_frame_w_trackers(i, save_path=img_path) out.write(cv2.imread(img_path)) out.release() print('Written video to', outpath)
def parse_and_generate(filename, out_filename=None, init_filename=None, include_paths=[], defines=[]): """ parse the file at filename. if out_filename and init_filename are None:return a tuple containing the generated file's names. otherwise return the generated source code for each """ from os import path if out_filename: out_filename = re.sub(FILENAME_EXPR, out_filename, filename) if init_filename: init_filename = re.sub(FILENAME_EXPR, init_filename, filename) rel_filename = '' if out_filename is None and init_filename is None: rel_filename = re.sub(FILENAME_EXPR, r'\g<basename>.h', path.basename(filename)) else: init_dir = path.dirname(init_filename) rel_filename = path.relpath(out_filename, init_dir) ast, text = parse_jstruct(filename, include_paths=include_paths, defines=defines) annotations = Annotations(text) try: annotations.expand(ast, '<stdin>') except ExpansionError as ex: ex.filename = filename raise prune_ast(ast, '<stdin>') out_ast, init_ast = split_ast(ast) generator = CGenerator() out_result = generator.visit(out_ast) init_result = generator.visit(init_ast) if GUARD_HEADERS_EXPR.search(out_result): out_result = re.sub( GUARD_HEADERS_EXPR, r'\g<0>' + GENERATED, out_result, count=1) + '\n#endif\n' else: out_result = GENERATED + out_result init_result = re.sub(GUARD_HEADERS_EXPR, '', init_result) init_instructions = INIT_INSTRUCTIONS if init_filename and init_filename.endswith( '.h') else '' init_result = GENERATED1NL + init_instructions + INCLUDE_H( rel_filename) + init_result if out_filename: with open(out_filename, 'w') as out_file: out_file.write(out_result) if init_filename: with open(init_filename, 'w') as init_file: init_file.write(init_result) if out_filename is None and init_filename is None: return (out_result, init_result) else: return (out_filename, init_filename)
def __init__(self, output_dir): self.output_dir = Path(output_dir) self.annotations = Annotations("annotations-bitcoin-0.18.json")
output_file = args[4] output_json = {} with open(json_dir + "/info.json", 'r') as f: info_json = json.load(f) output_json.update(info_json) with open(json_dir + "/licenses.json", 'r') as f: licencses_json = json.load(f) output_json.update(licencses_json) with open(json_dir + "/categories.json", 'r') as f: categories_json = json.load(f) output_json.update(categories_json) images = ArcImages(images_dir) images_obj = images.get_obj() images_str = json.dumps(images_obj) images_json = json.loads(images_str) output_json.update(images_json) annos = Annotations(bboxes_dir) annos_obj = annos.get_obj() expand_json(images_obj, annos_obj) annos_str = json.dumps(annos.get_json()) annos_json = json.loads(annos_str) output_json.update(annos_json) with open(output_file, 'w') as f: json.dump(output_json, f)
def mark_added(self, annotations_file, version, command): annotations = Annotations(annotations_file) annotations.mark_added(version, command)
def show_missing(self, cli, annotations_file): commands = HelpParser().parse_help_overview( CliCaller(cli).help()).flat() annotations = Annotations(annotations_file) annotations.show_missing(commands)