コード例 #1
0
def finalize_third_parties(evidence, third_party_analysis, sdks):
    """
    Given evidence and list of sdks, goes through all projects and sees which
    are fully using a practice based on first party plist and authorization
    information
    :param evidence: evidence list of first party analysis
    :param third_party_analysis: evidence list of third party analysis
    :param sdks: sdks
    :return third_party_analysis: rewritten evidence to be used if and only if
    there is third party evidence PLUS plist evidence
    """
    # get all practices in one list
    practices_present = []
    for i in third_party_analysis:
        practices_present.append(i.practice)
    practices_present = list(dict.fromkeys(practices_present))

    for project in sdks:
        for p in practices_present:
            # first we need plist
            if plist_search(evidence, p):
                # if we have a framework, class, plist, and authorization method in the third party then its used
                if thirdp_search(third_party_analysis, p, project):
                    third_party_analysis.append(
                        Evidence(project, p, Used.USED,
                                 Classification.THIRDPARTY))
                # if we have everything but authorization and authorization is in first party then its also used
                elif thirdp_noauth_search(third_party_analysis, p,
                                          project) and authorize_search(
                                              evidence, p, True):
                    third_party_analysis.append(
                        Evidence(project, p, Used.USED,
                                 Classification.THIRDPARTY))

    return third_party_analysis
コード例 #2
0
def check_classifications(evidence, third_party_analysis, first_party,
                          file_name):
    """
    Given list of evidence, if there is a FRAMEWORK, a METHOD, and a CLASS,
    then we know the first or third party is accessing the privacy sensitive
    info (just missing plist which will be checked later)

    :param evidence: list of evidence
    :param first_party: Boolean that is true if the file being analyzed is first party
    :param file_name: name of file to append evidence
    :return evidence: list with evidence
    """
    temp_evidence = evidence + third_party_analysis
    practices_present = []
    for i in temp_evidence:
        practices_present.append(i.practice)
    practices_present = list(dict.fromkeys(practices_present))

    for practice in practices_present:
        if first_party:
            # general all three things first party then we know first party is using it
            if import_search(temp_evidence, practice, True) and \
                    authorize_search(temp_evidence, practice, True) and \
                    constructor_search(temp_evidence, practice, True):
                if practice != Practices.IDFA and practice != Practices.FACEBOOK and practice != Practices.GOOGLE:
                    evidence.append(
                        Evidence(file_name, practice, Used.UNUSED,
                                 Classification.FIRSTPARTY))

            # now if authorization is in third party but there are functions in first party then first party is also being used
            elif import_search(temp_evidence, practice,
                               True) and constructor_search(
                                   temp_evidence, practice,
                                   True) and third_party_auth_search(
                                       temp_evidence, practice):
                if practice != Practices.IDFA and practice != Practices.FACEBOOK and practice != Practices.GOOGLE:
                    evidence.append(
                        Evidence(file_name, practice, Used.UNUSED,
                                 Classification.FIRSTPARTY))

            # This is to check IDFA, Facebook, and Google log in
            if import_search(temp_evidence, practice, True) and \
                    (method_search(temp_evidence, practice, False) or authorize_search(temp_evidence, practice, True)) and \
                    constructor_search(temp_evidence, practice, True):
                if practice == Practices.IDFA:
                    evidence.append(
                        Evidence(file_name, practice, Used.USED,
                                 Classification.THIRDPARTY))
                elif practice == Practices.GOOGLE:
                    evidence.append(
                        Evidence(file_name, practice, Used.USED,
                                 Classification.THIRDPARTY))
                elif practice == Practices.FACEBOOK:
                    evidence.append(
                        Evidence(file_name, practice, Used.USED,
                                 Classification.THIRDPARTY))

    return evidence
コード例 #3
0
def get_canonical_sequence(item, entity='all'):

    from ._add_reference_to_evidence import _add_reference_to_evidence

    seq = item['uniprot']['entry']['sequence']['#text']
    accession = item['uniprot']['entry']['accession'][0]
    evidence = Evidence()
    evidence.value = seq
    evidence.add_reference({'database': 'UniProtKB', 'id': accession})

    return evidence
コード例 #4
0
def get_uniprot_id(item, entity='all'):

    evidence = Evidence()
    if type(item['uniprot']['entry']['accession']) is list:
        accession = item['uniprot']['entry']['accession'][0]
    else:
        accession = item['uniprot']['entry']['accession']
    evidence.value = accession
    evidence.add_reference({'database':'UniProtKB', 'id':accession})

    return evidence
コード例 #5
0
def get_uniprot_entry_name(item, entity='all'):

    from ._add_reference_to_evidence import _add_reference_to_evidence

    evidence = Evidence()

    evidence.value = item['uniprot']['entry']['name']
    accession = item['uniprot']['entry']['accession'][0]
    evidence.add_reference({'database': 'UniProtKB', 'id': accession})

    return evidence
コード例 #6
0
def check_plist(plist_directory, plist_content, first_party_info):
    """
    Checks an app's plist for used privacy practices.

    An app must specify a permission in its plist file to use it.
    This function searches for permission keys.

    :param plist_directory: the directory of the plist file
    :param plist_content: text content of the plist
    :param first_party_info: the dictionary of what first party code to look
            for for every privacy practice
    :return evidence: Evidence objects list with all privacy practices
            found in a plist file
    """
    specification_regex = re.compile(r"\s<string>(.*)<\/string>")

    evidence = []
    plist_lines = plist_content.splitlines()
    for i in range(len(plist_lines)):
        line = plist_lines[i]
        for (practice, data) in iteritems(first_party_info):
            for plist_key in data["SWIFT"]["PLIST"]:
                if "<key>" + plist_key + "</key>" in line:
                    next_line = plist_lines[i + 1]
                    matches = specification_regex.search(next_line)
                    # Assuming UTF-8 encoding for the .plist file
                    specification = matches.group(1)
                    evidence.append(
                        Evidence(plist_directory,
                                 practice,
                                 Used.UNUSED,
                                 Classification.PLIST,
                                 reason=specification))
    return evidence
コード例 #7
0
def get_organism_scientific_name(item, entity='all'):

    from ._add_reference_to_evidence import _add_reference_to_evidence

    output = None

    accession = item['uniprot']['entry']['accession'][0]

    if 'organism' in item['uniprot']['entry']:

        organism = item['uniprot']['entry']['organism']

        if type(organism['name'])==list:
            for name in organism['name']:
                if name['@type']=='scientific':

                    evidence = Evidence()
                    evidence.value = name['#text']

                    dbref_type = organism['dbReference']['@type']
                    dbref_id = organism['dbReference']['@id']

                    if dbref_type=='NCBI Taxonomy':
                        evidence.add_reference({'database':'NCBI_Taxonomy', 'id':dbref_id})
                    else:
                        raise ValueError('Unknown reference in database')

                    evidence.add_reference({'database':'UniProtKB', 'id':accession})

                    output = evidence

    return output
コード例 #8
0
def generate_evidence():
    evidence = []

    with open('datasets/perfume_data.csv', 'rb') as csvfile:
        spamreader = csv.reader(csvfile, delimiter=';')
        for row in spamreader:
            for measurement in row[1:]:
                evidence.append(Evidence(float(measurement.replace(",", "."))))

    return evidence
コード例 #9
0
def analyze_bin_file(first_party_info, text, file_name):
    """
    Takes a binary file and looks for keywords in the text

    :param first_party_info: the dictionary of what first party code to look
            for for every privacy practice
    :param text: text to look through
    :param file_name: name of binary file
    :return evidence: list with evidence
    """
    evidence = []  # the list of privacy practice usages found on this line
    for (practice, practice_info) in iteritems(first_party_info):
        for constructor in practice_info["OBJECTIVE-C"]["CLASS"]:
            if constructor in text:  # Found a constructor function
                evidence.append(
                    Evidence(file_name, practice, Used.UNUSED,
                             Classification.CLASS_THIRD))
        for function in practice_info["OBJECTIVE-C"]["AUTHORIZATION_METHOD"]:
            if function in text:
                if practice_info["OBJECTIVE-C"]["AUTHORIZATION_METHOD"][
                        function] == []:
                    evidence.append(
                        Evidence(file_name, practice, Used.UNUSED,
                                 Classification.AUTHORIZATION_THIRD))
                else:
                    for parameter in practice_info["OBJECTIVE-C"][
                            "AUTHORIZATION_METHOD"][function]:
                        if parameter in text:
                            evidence.append(
                                Evidence(file_name, practice, Used.UNUSED,
                                         Classification.AUTHORIZATION_THIRD))
        for import_name in practice_info["OBJECTIVE-C"]["FRAMEWORK"]:
            if import_name in text:
                evidence.append(
                    Evidence(file_name, practice, Used.UNUSED,
                             Classification.FRAMEWORK_THIRD))
        for method in practice_info["OBJECTIVE-C"]["ADDITIONAL_EVIDENCE"]:
            if method in text:
                evidence.append(
                    Evidence(file_name, practice, Used.UNUSED,
                             Classification.METHOD_THIRD))

    return evidence
コード例 #10
0
def get_name(item, entity='all'):

    from ._add_reference_to_evidence import _add_reference_to_evidence

    evidence = Evidence()

    fullName = item['uniprot']['entry']['protein']['recommendedName'][
        'fullName']

    if type(fullName) == str:
        evidence.value = fullName
    elif type(fullName) == OrderedDict:
        if '#text' in fullName:
            evidence.value = fullName['#text']
        if '@evidence' in fullName:
            evidence_numbers_in_db = fullName['@evidence'].split()
            for evidence_number_in_db in evidence_numbers_in_db:
                evidence_in_db = item['uniprot']['entry']['evidence'][
                    int(evidence_number_in_db) - 1]
                if evidence_in_db['@key'] != evidence_number_in_db:
                    raise ValueError(
                        'Evidence number does not match evidence @key')
                _add_reference_to_evidence(evidence, evidence_in_db)

    accession = item['uniprot']['entry']['accession'][0]
    evidence.add_reference({'database': 'UniProtKB', 'id': accession})

    return evidence
コード例 #11
0
ファイル: get_short_name.py プロジェクト: uibcdf/Sabueso
def get_short_name(item, entity='all'):

    from ._add_reference_to_evidence import _add_reference_to_evidence

    output = None

    accession = item['uniprot']['entry']['accession'][0]

    if 'shortName' in item['uniprot']['entry']['protein']['recommendedName']:
        evidence = Evidence()
        recommendedName = item['uniprot']['entry']['protein'][
            'recommendedName']
        if type(recommendedName['shortName']) == str:
            evidence.value = recommendedName['shortName']
        else:
            evidence.value = recommendedName['shortName']['#text']
            if '@evidence' in recommendedName['shortName']:
                evidence_numbers_in_db = recommendedName['shortName'][
                    '@evidence'].split(' ')
                for evidence_number_in_db in evidence_numbers_in_db:
                    evidence_in_db = item['uniprot']['entry']['evidence'][
                        int(evidence_number_in_db) - 1]
                    if evidence_in_db['@key'] != evidence_number_in_db:
                        raise ValueError(
                            'Evidence number does not match evidence @key')
                    _add_reference_to_evidence(evidence, evidence_in_db)
        evidence.add_reference({'database': 'UniProtKB', 'id': accession})
        output = evidence

    return output
コード例 #12
0
def classify_third_parties(evidence, project):
    """
    Given evidence list and third party project see what practices are being used
    by different sdks

    :param evidence: evidence list of first party analysis
    :param project: sdk project being analyzed
    :return evidence: list of third party projects using different varying
    degrees of usage
    """
    practices_present = []
    for i in evidence:
        practices_present.append(i.practice)
    practices_present = list(dict.fromkeys(practices_present))

    for practice in practices_present:
        # general third party usage

        if import_search(evidence, practice, False) and constructor_search(
                evidence, practice, False):
            if authorize_search(evidence, practice, False):
                # if all types are present then we know its being used
                evidence.append(
                    Evidence(project, practice, Used.UNUSED,
                             Classification.THIRDPARTY))
            else:
                # just missing authorization means there needs to be authorization in first party
                evidence.append(
                    Evidence(project, practice, Used.UNUSED,
                             Classification.THIRDPARTY_NOAUTH))

        # if just authorization is present but nothing else we need to flag that as well
        elif authorize_search(evidence, practice, False):
            evidence.append(
                Evidence(project, practice, Used.UNUSED,
                         Classification.THIRDPARTY_AUTH))

    return evidence
コード例 #13
0
def format_evidence(evidence):
    """
    Given evidence list, returns whether a practice is USED or UNUSED based on
    whether it has a constructor, instance, and import

    :param evidence: evidence of whether any calls are made
    :return evidence: evidence that has been USED if all 4 aspects are
    included: constructor, instance, import, plist
    """
    elist = {}

    for i in evidence:
        if i.classification == Classification.PLIST or i.classification == Classification.FIRSTPARTY or i.classification == Classification.ENTITLEMENT:
            if i.practice not in elist:
                elist[i.practice] = [i.classification]
            else:
                elist[i.practice] = elist[i.practice] + [i.classification]

    for i in elist:
        if i == Practices.HOMEKIT or i == Practices.HEALTH:
            if Classification.FIRSTPARTY in elist[
                    i] and Classification.PLIST in elist[
                        i] and Classification.ENTITLEMENT in elist[i]:
                evidence.append(
                    Evidence(str(0), i, Used.USED, Classification.FIRSTPARTY))
        elif i == Practices.PURCHASES:
            if Classification.FIRSTPARTY in elist[
                    i] and Classification.ENTITLEMENT in elist[i]:
                evidence.append(
                    Evidence(str(0), i, Used.USED, Classification.FIRSTPARTY))
        elif Classification.FIRSTPARTY in elist[
                i] and Classification.PLIST in elist[i]:
            # add USED evidence as placeholder
            evidence.append(
                Evidence(str(0), i, Used.USED, Classification.FIRSTPARTY))

    return evidence
コード例 #14
0
def get_host(item, entity='all'):

    if 'organismHost' in item['uniprot']['entry']:
        evidence = Evidence()
        host = item['uniprot']['entry']['organismHost']['name']['#text']
        accession = item['uniprot']['entry']['accession'][0]
        ncbi_taxonomy = item['uniprot']['entry']['organismHost'][
            'dbReference']['@id']
        evidence.value = host
        evidence.add_NCBI_Taxonomy(id=ncbi_taxonomy)
        evidence.add_UniProtKB(id=accession)
        return evidence
    else:
        return None
コード例 #15
0
def get_pdbs(item, entity='all'):

    output = []

    uniprot = item['uniprot']['entry']['accession'][0]
    dbReference = item['uniprot']['entry']['dbReference']

    for db in dbReference:
        if db['@type'] == 'PDB':
            accession = db['@id']
            evidence = Evidence()
            evidence.value = accession
            evidence.add_reference({'database': 'PDB', 'id': accession})
            evidence.add_reference({'database': 'UniProtKB', 'id': uniprot})
            output.append(evidence)

    return output
コード例 #16
0
    def build(self):
        screen_manager = ScreenManager()

        main_menu_screen = MainMenu(name='main_menu')
        questions_screen = Questions(name='questions')
        evidence_unique_screen = EvidenceUnique(name='evidence_unique')
        test_screen = Test(name='test')
        result_screen = Result(name='result')
        answers_screen = Answers(name='answers')
        evidence_screen = Evidence(name='evidence')

        screen_manager.add_widget(main_menu_screen)
        screen_manager.add_widget(questions_screen)
        screen_manager.add_widget(evidence_unique_screen)
        screen_manager.add_widget(test_screen)
        screen_manager.add_widget(result_screen)
        screen_manager.add_widget(answers_screen)
        screen_manager.add_widget(evidence_screen)
        return screen_manager
コード例 #17
0
def get_alternative_names(item, entity='all'):

    from ._add_reference_to_evidence import _add_reference_to_evidence
    from .get_short_name import get_short_name
    from .get_uniprot import get_uniprot

    output = []

    uniprot = get_uniprot(item, entity=entity)
    ref_uniprot = uniprot.references[0]

    short_name = get_short_name(item)
    if short_name is not None:
        output.append(short_name)

    if 'alternativeName' in item['uniprot']['entry']['protein']:
        alternativeName = item['uniprot']['entry']['protein']['alternativeName']

        if type(alternativeName)==OrderedDict:
            alternativeName = [alternativeName]

        if type(alternativeName)!=list:
            raise ValueError("alternativeName is not a list")

        for aux in alternativeName:
            if type(aux)==OrderedDict:
                for key, value in aux.items():
                    if key not in ['fullName', 'shortName']:
                        raise ValueError("Uknown alternative name type")

                    evidence = Evidence()
                    if type(value)==str:
                            evidence.value = value
                    else:
                        evidence.value = value['#text']
                        if '@evidence' in value:
                                evidence_numbers_in_db = value['@evidence'].split(' ')
                                for evidence_number_in_db in evidence_numbers_in_db:
                                    evidence_in_db = item['uniprot']['entry']['evidence'][int(evidence_number_in_db)-1]
                                    if evidence_in_db['@key']!=evidence_number_in_db:
                                        raise ValueError('Evidence number does not match evidence @key')
                                    _add_reference_to_evidence(evidence, evidence_in_db)

                    evidence.add_reference(ref_uniprot)
                    output.append(evidence)
            else:
                raise ValueError("Uknown alternativeName")


    return output
コード例 #18
0
ファイル: parser.py プロジェクト: N1ghtly/problog-gmm
def parse(filename):
    lines = (line.rstrip('\n') for line in open(filename, 'r'))

    evidence = []
    gaussians = []
    mustlink = []
    cannotlink = []

    for line in lines:
        parts = line.split(' ')
        if parts[0] == 'g':
            gaussians.append(Gaussian(float(parts[1]), float(parts[2]), float(parts[3])))
        elif parts[0] == 'e':
            evidence.append(Evidence(float(parts[1])))
        elif parts[0] == 'c':
            cannotlink.append(CannotLinkConstraint(evidence[int(parts[1])], evidence[int(parts[2])]))
        elif parts[0] == 'm':
            mustlink.append(MustLinkConstraint(evidence[int(parts[1])], evidence[int(parts[2])]))
    
    return (evidence, gaussians, mustlink, cannotlink)
コード例 #19
0
def analyze_entitlement_line(first_party_info, num, text, file_name):
    """
    Given an entitlement file line, checks to see if any practices are used

    :param first_party_info: the dictionary of what first party code to look
            for for every privacy practice
    :param num: line number of file
    :param text: text of line
    :param file_name: file name
    :return: updated evidence
    """
    evidence = []  # the list of privacy practice usages found on this line
    for (practice, practice_info) in iteritems(first_party_info):
        for key in practice_info["SWIFT"]["ENTITLEMENT"]:
            if "<key>" + key + "</key>" in text:
                evidence.append(
                    Evidence(file_name, practice, Used.UNUSED,
                             Classification.ENTITLEMENT))

    return evidence
コード例 #20
0
def analyze_swift_line(file_line, first_party_info, file_name, first_party):
    """
    Looks for first-party function calls and import statements

    The function takes a line of Swift source code (quotes and comments removed)
    to find import statements and function calls which indicate a use of
    privacy practice with functions provided by the iOS API.

    :param file_line: line of Swift source code
    :param first_party_info: the dictionary of what first party code to look
            for for every privacy practice
    :param line_num: the line number that is being analyze_data
    :param file_name: the name of the file that the line is in
    :param first_party: Boolean whether we're analyzing a first party file or not
    :return evidence: Evidence objects list with all privacy practices used in a line

    """
    evidence = []  # List of privacy practice usages found on this line

    for (practice, practice_info) in iteritems(first_party_info):
        for constructor in practice_info["SWIFT"]["CLASS"]:
            if constructor in file_line:  # Found a constructor function
                if first_party:
                    evidence.append(
                        Evidence(file_name, practice, Used.UNUSED,
                                 Classification.CLASS))
                else:
                    evidence.append(
                        Evidence(file_name, practice, Used.UNUSED,
                                 Classification.CLASS_THIRD))
        for function in practice_info["SWIFT"]["AUTHORIZATION_METHOD"]:
            if function in file_line:
                if practice_info["SWIFT"]["AUTHORIZATION_METHOD"][
                        function] == []:
                    if first_party:
                        evidence.append(
                            Evidence(file_name, practice, Used.UNUSED,
                                     Classification.AUTHORIZATION))
                    else:
                        evidence.append(
                            Evidence(file_name, practice, Used.UNUSED,
                                     Classification.AUTHORIZATION_THIRD))
                else:
                    for parameter in practice_info["SWIFT"][
                            "AUTHORIZATION_METHOD"][function]:
                        if parameter in file_line:
                            if first_party:
                                evidence.append(
                                    Evidence(file_name, practice, Used.UNUSED,
                                             Classification.AUTHORIZATION))
                            else:
                                evidence.append(
                                    Evidence(
                                        file_name, practice, Used.UNUSED,
                                        Classification.AUTHORIZATION_THIRD))
        for evidence_item in practice_info["SWIFT"]["ADDITIONAL_EVIDENCE"]:
            if evidence_item in file_line:
                if practice_info["SWIFT"]["ADDITIONAL_EVIDENCE"][
                        evidence_item] == []:
                    if first_party:
                        evidence.append(
                            Evidence(file_name, practice, Used.UNUSED,
                                     Classification.METHOD))
                    else:
                        evidence.append(
                            Evidence(file_name, practice, Used.UNUSED,
                                     Classification.METHOD_THIRD))
                else:
                    for parameter in practice_info["SWIFT"][
                            "ADDITIONAL_EVIDENCE"][evidence_item]:
                        if parameter in file_line:
                            if first_party:
                                evidence.append(
                                    Evidence(file_name, practice, Used.UNUSED,
                                             Classification.METHOD))
                            else:
                                evidence.append(
                                    Evidence(file_name, practice, Used.UNUSED,
                                             Classification.METHOD_THIRD))
        for import_name in practice_info["SWIFT"]["FRAMEWORK"]:
            if "import" in file_line and import_name in file_line:
                if first_party:
                    evidence.append(
                        Evidence(file_name, practice, Used.UNUSED,
                                 Classification.FRAMEWORK))
                else:
                    evidence.append(
                        Evidence(file_name, practice, Used.UNUSED,
                                 Classification.FRAMEWORK_THIRD))

    return evidence
コード例 #21
0
def get_dbreference(item, entity='all', dbname=None):

    from ._add_reference_to_evidence import _add_reference_to_evidence

    uniprot = item['uniprot']['entry']['accession'][0]
    dbReference = item['uniprot']['entry']['dbReference']
    output = []

    for db in dbReference:
        if db['@type']==dbname:
            accession = db['@id']
            evidence = Evidence()
            evidence.value = accession
            if dbname=='ChEMBL':
                evidence.add_reference({'database':'ChEMBL', 'id':accession})
            elif dbname=='EC':
                evidence.add_reference({'database':'EC', 'id':accession})
            elif dbname=='DIP':
                evidence.add_reference({'database':'DIP', 'id':accession})
            elif dbname=='ELM':
                evidence.add_reference({'database':'ELM', 'id':accession})
            elif dbname=='IntAct':
                evidence.add_reference({'database':'IntAct', 'id':accession})
            elif dbname=='BindingDB':
                evidence.add_reference({'database':'BindingDB', 'id':accession})
            elif dbname=='BioGRID':
                evidence.add_reference({'database':'BioGRID', 'id':accession})
            elif dbname=='iPTMnet':
                evidence.add_reference({'database':'iPTMnet', 'id':accession})
            elif dbname=='MINT':
                evidence.add_reference({'database':'MINT', 'id':accession})
            elif dbname=='PhosphoSitePlus':
                evidence.add_reference({'database':'PhosphoSitePlus', 'id':accession})
            elif dbname=='ProDom':
                evidence.add_reference({'database':'ProDom', 'id':accession})
            elif dbname=='ProteinModelPortal':
                evidence.add_reference({'database':'ProteinModelPortal', 'id':accession})
            elif dbname=='STRING':
                evidence.add_reference({'database':'STRING', 'id':accession})
            elif dbname=='SMR':
                evidence.add_reference({'database':'SwissModel', 'id':accession})
            else:
                raise ValueError('Database name not recognized')
            if '@evidence' in db:
                evidence_numbers_in_db = db['@evidence'].split()
                for evidence_number_in_db in evidence_numbers_in_db:
                    evidence_in_db = item['uniprot']['entry']['evidence'][int(evidence_number_in_db)-1]
                    if evidence_in_db['@key']!=evidence_number_in_db:
                        raise ValueError('Evidence number does not match evidence @key')
                    _add_reference_to_evidence(evidence, evidence_in_db)

            evidence.add_reference({'database':'UniProtKB', 'id':uniprot})
            output.append(evidence)

    if len(output)>1:
        return output
    elif len(output)==1:
        return output[0]
    else:
        return None
コード例 #22
0
ファイル: tsebra.py プロジェクト: LarsGab/TSEBRA
def main():
    """
        Overview:

        1. Read gene predicitions from .gtf files.
        2. Read Evidence from .gff files.
        3. Detect overlapping transcripts.
        4. Create feature vector (for a list of all features see features.py)
           for all transcripts.
        5. Compare the feature vectors of all pairs of overlapping transcripts.
        6. Exclude transcripts based on the 'transcript comparison rule' and 5.
        7. Remove Transcripts with low evidence support.
        8. Create combined gene predicitions (all transcripts that weren't excluded).
    """

    from genome_anno import Anno
    from overlap_graph import Graph
    from evidence import Evidence

    global anno, graph, parameter

    args = parseCmd()
    init(args)

    if v > 0:
        print(gtf)

    # read gene prediciton files
    c = 1
    for g in gtf:
        if not quiet:
            sys.stderr.write('### READING GENE PREDICTION: [{}]\n'.format(g))
        anno.append(Anno(g, 'anno{}'.format(c)))
        anno[-1].addGtf()
        anno[-1].norm_tx_format()
        c += 1

    # read hintfiles
    evi = Evidence()
    for h in hintfiles:
        if not quiet:
            sys.stderr.write('### READING EXTRINSIC EVIDENCE: [{}]\n'.format(h))
        evi.add_hintfile(h)
    for src in evi.src:
        if src not in parameter.keys():
            sys.stderr.write('ConfigError: No weight for src={}, it is set to 1\n'.format(src))
            parameter.update({src : 1})

    # create graph with an edge for each unique transcript
    # and an edge if two transcripts overlap
    # two transcripts overlap if they share at least 3 adjacent protein coding nucleotides
    graph = Graph(anno, para=parameter, verbose=v)
    if not quiet:
        sys.stderr.write('### BUILD OVERLAP GRAPH\n')
    graph.build()

    # add features
    if not quiet:
        sys.stderr.write('### ADD FEATURES TO TRANSCRIPTS\n')
    graph.add_node_features(evi)

    # apply decision rule to exclude a set of transcripts
    if not quiet:
        sys.stderr.write('### SELECT TRANSCRIPTS\n')
    combined_prediction = graph.get_decided_graph()

    if v > 0:
        sys.stderr.write(str(combined_prediction.keys()) + '\n')
        for a in anno:
            sys.stderr.write('Numb_tx in {}: {}\n'.format(a.id, len(combined_prediction[a.id])))

    # write result to output file
    if not quiet:
        sys.stderr.write('### WRITE COMBINED GENE PREDICTION\n')
    combined_gtf = []
    for a in anno:
        combined_gtf += a.get_subset_gtf(combined_prediction[a.id])
    with open(out, 'w+') as file:
        out_writer = csv.writer(file, delimiter='\t', quotechar = "'")
        for line in combined_gtf:
            out_writer.writerow(line)

    if not quiet:
        sys.stderr.write('### FINISHED\n\n')
        sys.stderr.write('### The combined gene prediciton is located at {}.\n'.format(\
            out))
コード例 #23
0
    "The Ultimate Diplomat", classroom,
    "We needn't worry. Rationality will set us free. You of all people understand that, I'm sure."
)
fencer = Character(
    "The Ultimate Fencer", classroom,
    "No time for games. We need to break out before someone cracks.")
hunter = Character(
    "The Ultimate Hunter", lab,
    "Yo, want to come hang out with me? No point in worrying about all this. I trust you guys."
)
priestess = Character(
    "The Ultimate Priestess", lab,
    "I must have faith that we can escape from this prison. How Job-like!")

autopsy = Evidence(
    "Autopsy Report",
    "The victim was the Ultimate Pharmacologist. The fatal wound was a blow to the back of the head, which killed him instantly. His time of death was approximately 11 PM last night. No traces of poison were found in his system, though the powder spilled all over him was a lethal dose.",
    classroom)
poison_powder = Evidence(
    "Poison Powder",
    "Indeed, some form of powder is spilled all over him, covering his upper chest region and some parts of his arms. The floor under him is also covered in the same powder. There is one small region where it looks like something hit the powder, but it’s no larger than a quarter, and there is no poison on anything else in the room.",
    classroom)
glass_bottle = Evidence(
    "Glass Bottle",
    "The glass bottle was found underneath the victim at the time he was discovered, which means that the bottle was on the floor before him. In addition, the bottle was still corked, meaning that before he fell and broke it, it was not harmful. This also means the spread of powder across his chest must have happened after he died.",
    classroom)
broken_neck = Evidence(
    "Broken Neck",
    "His head wound appears to have been caused by a fairly small object, actually, considering the size of the wound, but in addition his neck looks broken. Could this be a separate wound? Why would the killer break his neck after his death?",
    classroom)
sculptor_account = Evidence(
    "sculptor's account",
コード例 #24
0
def test_get_hint():
    evi = Evidence()
    evi.add_hintfile(testDir + '/evidence/hint3.gff')
    mult = evi.get_hint('3R','801','899','intron','+')
    assert sum(mult.values()) == 28