def test_coerce_numeric_values(self):
        cm = Annotations(
            "../tests/data/metadata_example.txt",
            ["text/csv", "text/plain", "text/tab-separated-values"],
        )
        cm.create_data_frame()
        cm.file = Annotations.coerce_numeric_values(cm.file, cm.annot_types)
        dtype = cm.file.dtypes[("Average Intensity", "numeric")]
        self.assertEqual(dtype, np.float)

        # Test that numeric values wer
        # Pick a random number between 1 and amount of lines in file
        ran_num = random.randint(1, 20)
        for column in cm.file.columns:
            annot_type = column[1]
            if annot_type == "numeric":
                value = str(cm.file[column][ran_num])
                print(Decimal(value).as_tuple().exponent)
                assert (
                    abs(Decimal(value).as_tuple().exponent) >= self.EXPONENT
                ), "Numbers did not round to 3 or less decimals places"

        # Test for string in numeric column
        cm_has_bad_value = Annotations(
            "../tests/data/metadata_bad_contains_str_in_numeric_column.txt",
            ["text/csv", "text/plain", "text/tab-separated-values"],
        )
        cm_has_bad_value.create_data_frame()
        self.assertRaises(
            ValueError,
            Annotations.coerce_numeric_values,
            cm_has_bad_value.file,
            cm_has_bad_value.annot_types,
        )
예제 #2
0
def apply(binary_path: str, sig_path: str) -> Tuple[int, str]:
    """
    Applies signatures in specified file to specified binary, and writes resulting bndb to disk

    :param binary_path: path of binary to apply signatures to
    :param sig_path: path of signature file to read in
    :return: tuple (int count of function signatures matched, str path to BNDB with tags that was created)
    """
    bv = binja.BinaryViewType.get_view_of_file(binary_path)
    print("Loaded binary {} into Binary Ninja.".format(binary_path))
    functions = hash_all(bv)
    print("{} functions in binary have been hashed.".format(len(functions)))
    data = read_json(sig_path)
    signatures = {}
    for raw_hash in data:
        # only bother with functions that actually have tags
        if len(data[raw_hash]) > 0:
            signatures[raw_hash] = Annotations(raw_data=data[raw_hash])

    print("Signature file {} loaded into memory.".format(sig_path))

    num_func_sigs_applied = 0
    for function_hash in functions:
        if function_hash in signatures:
            tag_function(bv, functions[function_hash], function_hash, signatures)
            print('Located a match at {}!'.format(function_hash))
            num_func_sigs_applied += 1

    output_bndb = os.path.join(os.getcwd(), binary_path + '.bndb')
    print("Writing output Binary Ninja database at {}".format(output_bndb))
    bv.create_database(output_bndb)
    return num_func_sigs_applied, output_bndb
예제 #3
0
파일: __main__.py 프로젝트: ning-y/trcls
def main():
    r"""
    Runs trcls.
    """
    parser = cli.get_parser()
    args = parser.parse_args()
    logger = setup_logging(args)

    if args.version:
        print('trcls {}'.format(VERSION))
        exit(0)

    if args.alignment == None or args.features == None:
        logger.error(
            'Both SAM alignment and GTF annotation files must be provided')
        parser.print_help()
        exit(1)

    with open(args.features) as features_file:
        annotations = Annotations(features_file)

    with open(args.alignment) as alignment_file:
        alignments = alignment_file.readlines()

    headers = filter(lambda l: l.startswith('@'), alignments)
    headers = map(str.strip, headers)
    alignments = filter(lambda l: not l.startswith('@'), alignments)

    transcripts = get_transcripts(alignments, args.skip_tolerance,
                                  args.map_tolerance)

    print('\n'.join(headers))
    for transcript in transcripts:
        transcript.annotate(annotations, args.junction_tolerance)
        print(transcript)
 def test_validate_numeric_annots(self):
     cluster = Annotations(
         "../tests/data/cluster_bad_missing_coordinate.txt",
         TestAnnotations.ALLOWED_FILE_TYPES,
     )
     cluster.create_data_frame()
     self.assertTrue(cluster.validate_numeric_annots)
    def test_merge_df(self):
        cluster = Clusters(
            "../tests/data/test_1k_cluster_data.csv",
            "dec0dedfeed1111111111111",
            "addedfeed000000000000000",
            "testCluster",
        )
        cell_metadata_df = Annotations(
            self.CELL_METADATA_PATH,
            ["text/csv", "text/plain", "text/tab-separated-values"],
        )
        cell_metadata_df.preprocess()
        cell_names_cell_metadata_df = np.asarray(cell_metadata_df.file["NAME"])
        cell_names_cluster_df = np.asarray(cluster.file["NAME"])
        # Cell names found in both cluster and metadata files
        common_cell_names = cell_names_cluster_df[
            np.isin(cell_names_cluster_df, cell_names_cell_metadata_df)
        ]
        print(f"common cell names: {common_cell_names}")
        # Perform merge
        print(cluster.file[["NAME", "x", "y", "z"]])
        cluster.merge_df(cluster.file[["NAME", "x", "y", "z"]], cell_metadata_df.file)

        # Ensure ONLY common cell names found in cell metadata file and cluster file
        # are in the newly merged df
        result = all(
            cell[0] in common_cell_names for cell in cluster.file["NAME"].values
        )
        self.assertTrue(
            result,
            f"Merge was not performed correctly. Merge should be performed on 'NAME'",
        )
    def test_low_mem_artifact(self):
        # pandas default of low_memory=True allows internal chunking during parsing
        # causing inconsistent dtype coercion artifact for larger annotation files

        lmtest = Annotations(
            "../tests/data/low_mem_unit.txt",
            ["text/csv", "text/plain", "text/tab-separated-values"],
        )
        lmtest.preprocess()

        # when low memory=True, the first row in the file would be in the first chunk
        # and the numeric value was not properly coerced to become a string
        assert isinstance(
            lmtest.file["mixed_data"]["group"][0], str
        ), "numeric value should be coerced to string"

        # Per SCP-2545 NA values become strings for group annotations.
        print(lmtest.file["mixed_data"]["group"][2])
        print(type(lmtest.file["mixed_data"]["group"][2]))
        assert isinstance(
            lmtest.file["mixed_data"]["group"][2], str
        ), "expect empty cell conversion to NaN is string for group annotation"

        # numeric value in second chunk should still properly be coerced to string type
        assert isinstance(
            lmtest.file["mixed_data"]["group"][32800], str
        ), "numeric value should be coerced to string"
예제 #7
0
    def __init__(self, image_dir, gt_path, seqname=None, trackers=[]):
        if (seqname == None):
            # Assumed data format is /path/to/data/seqname.{csv|txt}
            self.seqname = gt_path.split('/')[-1][:-4]
        else:
            self.seqname = seqname

        self.gt_annotations = Annotations(gt_path, seqname=seqname)
        self.tracker_res = {}
        for i in trackers:
            try:
                self.tracker_res[i.name] = i.get_res_of(seqname)
            except:
                print(self.seqname, 'not available for', i.name)
        self.img_dir = image_dir
        self.images = [
            i for i in os.listdir(image_dir)
            if i[-4:] == '.png' or i[-4:] == '.jpg'
        ]
        self.images.sort()
        height, width, layers = cv2.imread(
            os.path.join(image_dir, self.images[0])).shape
        self.height = height
        self.width = width
        self.size = (width, height)
        self.obj_size = self.gt_annotations.obj_size
 def __init__(self, cluster_file, cell_metadata_file=None):
     Annotations.__init__(self, cluster_file, self.ALLOWED_FILE_TYPES)
     self.preprocess()
     self.determine_coordinates_and_cell_names()
     if cell_metadata_file is not None:
         self.cell_metadata = Annotations(cell_metadata_file,
                                          CellMetadata.ALLOWED_FILE_TYPES)
 def test_leading_zeros(self):
     """Ensures leading zeros are not stripped from group annotations"""
     path = "../tests/data/metadata_convention_with_leading_0s.tsv"
     annotation = Annotations(
         path, ["text/csv", "text/plain", "text/tab-separated-values"]
     )
     annotation.preprocess()
     # Grab value from donor id column.
     value_with_leading_zeros = annotation.file.iloc[
         :, annotation.file.columns.get_level_values(0) == "donor_id"
     ].values.item(0)
     self.assertTrue(value_with_leading_zeros.startswith("0"))
    def test_duplicate_headers(self):
        """Annotation headers should not contain duplicate values
        """
        dup_headers = Annotations(
            "../tests/data/dup_headers_v2.0.0.tsv",
            ["text/csv", "text/plain", "text/tab-separated-values"],
        )

        self.assertFalse(
            dup_headers.validate_unique_header(),
            "Duplicate headers should fail format validation",
        )

        with self.assertRaises(ValueError):
            dup_headers.preprocess()
예제 #11
0
def read_tags(bv: Binary_View,
              hashes: Dict[str, Function]) -> Dict[str, Annotations]:
    """
    Gathers tag locations from every function in the binary.

    :param bv: BinaryView that contains the analysis results
    :param hashes: a dictionary mapping hashes to their functions
    :return: dictionary representing all tags in the current binary
    """
    tagged_dict = {}

    # TODO: switch to use GetAllTagReferences once it's available in the python API for O(1) access times
    for hash_value in hashes:
        function = hashes[hash_value]
        tagged_dict[hash_value] = Annotations(function=function, bv=bv)
    return tagged_dict
 def test_convert_header_to_multiIndex(self):
     expected = [
         ("Name", "TYPE"),
         ("X", "numeric"),
         ("Y", "numeric"),
         ("Z", "numeric"),
         ("Average Intensity", "numeric"),
     ]
     path = "../tests/data/good_subsample_cluster.csv"
     annotation = Annotations(
         path, ["text/csv", "text/plain", "text/tab-separated-values"]
     )
     df = annotation.open_file(
         path, open_as="dataframe", skiprows=2, names=annotation.headers
     )[0]
     new_df = Annotations.convert_header_to_multi_index(df, expected)
     # Remove white spaces
     new_df_columns = [tuple(s.strip() for s in y) for y in new_df.columns]
     self.assertEqual(new_df_columns, expected)
예제 #13
0
    def tosling(self, filename):
        documents = []
        annotations = Annotations(self)
        input_stats = self.summary.input

        # Callback that will be invoked for each SLING document that is built.
        # This could be for each sentence or each document part, as specified.
        def callback(document):
            documents.append(document)

        with open(filename, "r") as f:
            input_stats.files.increment()
            lines = f.readlines()
            for line in lines:
                annotations.read(line, callback)

        for document in documents:
            self._add_output_statistics(document)

        return documents
    def test_header_format(self):
        """Header rows of metadata file should conform to standard
        """
        error_headers = Annotations(
            "../tests/data/error_headers_v2.0.0.tsv",
            ["text/csv", "text/plain", "text/tab-separated-values"],
        )

        self.assertFalse(
            error_headers.validate_header_keyword(),
            "Missing NAME keyword should fail format validation",
        )

        self.assertFalse(
            error_headers.validate_type_keyword(),
            "Missing TYPE keyword should fail format validation",
        )

        self.assertFalse(
            error_headers.validate_type_annotations(),
            "Invalid type annotations should fail format validation",
        )
예제 #15
0
 def __init__(self, output_dir):
     self.output_dir = Path(output_dir)
     self.annotations = Annotations("annotations-bitcoin-0.18.json")
예제 #16
0
 def clean_annotations(self, annotations_file):
     annotations = Annotations(annotations_file)
     annotations.clean_annotations()
예제 #17
0
 def import_see_also(self, markdown_dir, annotations_file):
     annotations = Annotations(annotations_file)
     annotations.import_see_also(markdown_dir)
 def setUp(self):
     self.df = Annotations(
         self.CLUSTER_PATH, ["text/csv", "text/plain", "text/tab-separated-values"]
     )
예제 #19
0
def flna_annotations():
    with open('test/FLNA.gtf') as gtf_file:
        return Annotations(gtf_file)
예제 #20
0
    output_file = args[4]

    output_json = {}
    with open(json_dir + "/info.json", 'r') as f:
        info_json = json.load(f)
        output_json.update(info_json)

    with open(json_dir + "/licenses.json", 'r') as f:
        licencses_json = json.load(f)
        output_json.update(licencses_json)

    with open(json_dir + "/categories.json", 'r') as f:
        categories_json = json.load(f)
        output_json.update(categories_json)

    images = ArcImages(images_dir)
    images_obj = images.get_obj()
    images_str = json.dumps(images_obj)
    images_json = json.loads(images_str)
    output_json.update(images_json)

    annos = Annotations(bboxes_dir)
    annos_obj = annos.get_obj()
    expand_json(images_obj, annos_obj)
    annos_str = json.dumps(annos.get_json())
    annos_json = json.loads(annos_str)
    output_json.update(annos_json)

    with open(output_file, 'w') as f:
        json.dump(output_json, f)
예제 #21
0
def parse_and_generate(filename,
                       out_filename=None,
                       init_filename=None,
                       include_paths=[],
                       defines=[]):
    """
    parse the file at filename.
    if out_filename and init_filename are None:return a tuple containing the generated file's names.
    otherwise return the generated source code for each
    """
    from os import path

    if out_filename:
        out_filename = re.sub(FILENAME_EXPR, out_filename, filename)
    if init_filename:
        init_filename = re.sub(FILENAME_EXPR, init_filename, filename)
    rel_filename = ''
    if out_filename is None and init_filename is None:
        rel_filename = re.sub(FILENAME_EXPR, r'\g<basename>.h',
                              path.basename(filename))
    else:
        init_dir = path.dirname(init_filename)
        rel_filename = path.relpath(out_filename, init_dir)

    ast, text = parse_jstruct(filename,
                              include_paths=include_paths,
                              defines=defines)
    annotations = Annotations(text)
    try:
        annotations.expand(ast, '<stdin>')
    except ExpansionError as ex:
        ex.filename = filename
        raise

    prune_ast(ast, '<stdin>')
    out_ast, init_ast = split_ast(ast)

    generator = CGenerator()
    out_result = generator.visit(out_ast)
    init_result = generator.visit(init_ast)
    if GUARD_HEADERS_EXPR.search(out_result):
        out_result = re.sub(
            GUARD_HEADERS_EXPR, r'\g<0>' + GENERATED, out_result,
            count=1) + '\n#endif\n'
    else:
        out_result = GENERATED + out_result
    init_result = re.sub(GUARD_HEADERS_EXPR, '', init_result)
    init_instructions = INIT_INSTRUCTIONS if init_filename and init_filename.endswith(
        '.h') else ''
    init_result = GENERATED1NL + init_instructions + INCLUDE_H(
        rel_filename) + init_result

    if out_filename:
        with open(out_filename, 'w') as out_file:
            out_file.write(out_result)
    if init_filename:
        with open(init_filename, 'w') as init_file:
            init_file.write(init_result)

    if out_filename is None and init_filename is None:
        return (out_result, init_result)
    else:
        return (out_filename, init_filename)
예제 #22
0
 def show_missing(self, cli, annotations_file):
     commands = HelpParser().parse_help_overview(
         CliCaller(cli).help()).flat()
     annotations = Annotations(annotations_file)
     annotations.show_missing(commands)
예제 #23
0
 def mark_added(self, annotations_file, version, command):
     annotations = Annotations(annotations_file)
     annotations.mark_added(version, command)
예제 #24
0
def generate_lawbook_gatsby(name):
    ANNOTATIONS = ANNOTATIONS_MAP.get(name, Annotations(list()))

    with open(os.path.join(STATIC_DIR, "%s.js" % name), "w+",
              encoding="utf-8") as fp:
        fp.write("""
import React from "react"
import Norm from "../components/norm"
import Abs from "../components/abs"
import Sub from "../components/sub"
import Section from "../components/section"

export default () => (
<div>
""")
        data = read_json(name)
        section_types = [
        ]  # how far we are in depth i.e. ["Buch", "Abschnitt", "Titel"]

        fp.write("<h1>%s</h1>" % name)

        for entry in data:
            if entry["type"] == "section":
                title = entry["title"]
                section_type = title.split(" ")[0]

                idx = find(section_type, section_types)
                if idx == -1: section_types.append(section_type)
                else:
                    fp.write("</Section>" * (len(section_types) - idx))
                    section_types = section_types[:idx + 1]

                fp.write("<Section title={'%s'}>" % title)
            else:
                paragraph, title = entry["norm"], entry.get("title", "")
                if title is None: title = ""

                # print("Writing %s %s" % (paragraph, name))

                fp.write(
                    "<Norm norm={'%s'} title={'%s'} marked={%s}>\n" %
                    (paragraph, title,
                     "true" if ANNOTATIONS.is_marked(paragraph) else "false"))

                for absatz in entry["paragraphs"]:
                    fp.write("<Abs> %s\n" % absatz["text"])

                    subs = absatz["sub"]
                    if subs:
                        for i, sub in enumerate(subs):
                            fp.write("<Sub>%d. %s\n" % (i + 1, sub["text"]))
                            subsubs = sub["sub"]

                            if subsubs != []:
                                fp.write("<div class='subsubbox'>\n")
                                letters = lit_gen()
                                for subsub in subsubs:
                                    fp.write(
                                        "<div class='subsub'>%s) %s</div>\n" %
                                        (next(letters), subsub["text"]))

                                fp.write("</div>\n")  # .subsubbox

                            fp.write("</Sub>\n")
                    fp.write("</Abs>\n")
                fp.write("</Norm>\n")

        if section_types:
            print(section_types)
            fp.write("</Section>" * (len(section_types)))

        fp.write("</div>)")  # end global div
예제 #25
0
def generate_lawbook(name):
    ANNOTATIONS = ANNOTATIONS_MAP.get(name, Annotations(list()))

    with open(os.path.join(STATIC_DIR, "%s.html" % name),
              "w+",
              encoding="utf-8") as fp:
        fp.write("""<html>
        <head>
          <title> %s </title>
          <meta charset="utf-8">
          <meta name="viewport" content="width=device-width, initial-scale=1">
          <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/css/bootstrap.min.css">
          <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
          <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.4.0/js/bootstrap.min.js"></script>
          <link href="css/gesetze.css" rel="stylesheet" title="Default Style">
        </head>

        <body>
        """ % name)

        data = read_json(name)

        fp.write("<h1>%s</h1>" % name)

        for entry in data:
            if entry["type"] == "section":
                fp.write("<h3>%s</h3>" % entry["title"])

            else:
                paragraph, title = entry["norm"], entry.get("title", "")
                if title is None: title = ""

                # print("Writing %s %s" % (paragraph, name))

                anchor = "<a id='#%s'></a>" % entry["norm"]
                fp.write("<div class='norm'>")
                fp.write(
                    "<div class='normhead%s'>%s %s</div> %s" %
                    (" marked" if ANNOTATIONS.is_marked(paragraph) else "",
                     paragraph, title, anchor))
                fp.write("<div class='normtext'>")

                for absatz in entry["paragraphs"]:
                    fp.write("<div class='abs'>%s" % (absatz["text"]))

                    subs = absatz["sub"]
                    if subs:
                        fp.write("<div class='subbox'>")
                        for i, sub in enumerate(subs):
                            fp.write("<div class='sub'>%d. %s" %
                                     (i + 1, sub["text"]))
                            subsubs = sub["sub"]

                            if subsubs != []:
                                fp.write("<div class='subsubbox'>")
                                letters = lit_gen()
                                for subsub in subsubs:
                                    fp.write(
                                        "<div class='subsub'>%s) %s</div>" %
                                        (next(letters), subsub["text"]))

                                fp.write("</div>")  # .subsubbox

                            fp.write("</div>")  # .sub
                        fp.write("</div>")  # .subbox
                    fp.write("</div>")  # .abs
                fp.write("</div>")  # .normtext
                fp.write("</div>")  # .norm

        fp.write("</body> </html>")