folder = args.output_folder file_suffix = args.output_file_suffix # ----------------------------- SETUP ----------------------------------- # date = str(datetime.now()).replace(" ", "-").replace(":", '-').replace('.', '-') log = open("tmp/training_log.txt", "w") dag = load_go_dag('data/gene_ontology.1_2.obo') if vectorizer_method not in ['count', 'tf-idf']: print('Vectorizer Method must select from: count | tf-idf') sys.exit(1) if folder: direc = 'results/{}-{}'.format(folder, date) su_make_dir(direc) else: direc = tempfile.mkdtemp(prefix='{}-{}-'.format(method, date), dir='results/') selection = [] ontologies = [] if pfam: selection.append('pfam') if ipr: selection.append('ipr') go_type = None if induce and go: go_type = 'induced_go' elif go and not induce: go_type = 'go'
labels = sorted(prep.get_labels_from_file('data/labels.tsv')) train, _ = prep.prep_data_frames(selection=[]) uniprots = list(train['uniprot'].values) try: interactome_fp = open(supplied_file, 'r') except IOError: print("Could not open supplied file {}.".format(supplied_file)) sys.exit(0) if label.lower() not in labels: print("Could not recognise label {}.".format(label)) sys.exit(0) else: label = label.lower() su_make_dir('{}/{}_{}'.format(out_dir, label, threshold)) out_dir = '{}/{}_{}'.format(out_dir, label, threshold) # Go through interactome and get all reactions with label header = interactome_fp.readline().strip().split('\t') protein_a_idx = header.index('p1') protein_b_idx = header.index('p2') gene_a_idx = header.index('g1') gene_b_idx = header.index('g2') num_annot = header.index('n_annot') label_idx = header.index(label) reactions = [] reaction_info = [ ('p1', protein_a_idx), ('p2', protein_b_idx),