Example #1
0
    folder = args.output_folder
    file_suffix = args.output_file_suffix


    # ----------------------------- SETUP ----------------------------------- #
    date = str(datetime.now()).replace(" ", "-").replace(":", '-').replace('.', '-')
    log = open("tmp/training_log.txt", "w")
    dag = load_go_dag('data/gene_ontology.1_2.obo')

    if vectorizer_method not in ['count', 'tf-idf']:
        print('Vectorizer Method must select from: count | tf-idf')
        sys.exit(1)

    if folder:
        direc = 'results/{}-{}'.format(folder, date)
        su_make_dir(direc)
    else:
        direc = tempfile.mkdtemp(prefix='{}-{}-'.format(method, date), dir='results/')

    selection = []
    ontologies = []
    if pfam:
        selection.append('pfam')
    if ipr:
        selection.append('ipr')

    go_type = None
    if induce and go:
        go_type = 'induced_go'
    elif go and not induce:
        go_type = 'go'
Example #2
0
    labels = sorted(prep.get_labels_from_file('data/labels.tsv'))
    train, _ = prep.prep_data_frames(selection=[])
    uniprots = list(train['uniprot'].values)

    try:
        interactome_fp = open(supplied_file, 'r')
    except IOError:
        print("Could not open supplied file {}.".format(supplied_file))
        sys.exit(0)

    if label.lower() not in labels:
        print("Could not recognise label {}.".format(label))
        sys.exit(0)
    else:
        label = label.lower()
    su_make_dir('{}/{}_{}'.format(out_dir, label, threshold))
    out_dir = '{}/{}_{}'.format(out_dir, label, threshold)

    # Go through interactome and get all reactions with label
    header = interactome_fp.readline().strip().split('\t')
    protein_a_idx = header.index('p1')
    protein_b_idx = header.index('p2')
    gene_a_idx = header.index('g1')
    gene_b_idx = header.index('g2')
    num_annot = header.index('n_annot')
    label_idx = header.index(label)

    reactions = []
    reaction_info = [
        ('p1', protein_a_idx),
        ('p2', protein_b_idx),