def load_data(parent_id, go_id): data = list() labels = list() positive = list() negative = list() with open(DATA_ROOT + parent_id + '/' + go_id + '.txt') as f: for line in f: line = line.strip().split(' ') label = int(line[0]) seq = line[2][:MAXLEN] if label == 1: labels.append(1) positive.append(seq) else: labels.append(0) negative.append(seq) shuffle(negative, seed=0) n = len(positive) negative = negative[:n] n = len(positive) labels = [0] * len(negative) + [1] * len(positive) data = negative + positive for i in range(len(data)): data[i] = encode_seq_one_hot(data[i], maxlen=MAXLEN) shuffle(data, labels, seed=0) return numpy.array(labels), numpy.array(data, dtype='float32')
def load_data(go_id): positive1 = list() positive2 = list() negative1 = list() negative2 = list() with open(DATA_ROOT + go_id + '.txt') as f: for line in f: line = line.strip().split(' ') label = int(line[0]) seq = line[2][:MAXLEN] hydro = encode_seq_hydro(seq, maxlen=MAXLEN) seq = encode_seq_one_hot(seq, maxlen=MAXLEN) if label == 1: positive1.append(seq) positive2.append(hydro) else: negative1.append(seq) negative2.append(hydro) shuffle(negative1, negative2, seed=0) n = len(positive1) data1 = negative1[:n] + positive1 data2 = negative2[:n] + positive2 labels = [0] * len(negative1) + [1] * len(positive1) shuffle(data1, data2, labels, seed=0) data = (numpy.array(data1, dtype='float32'), numpy.array(data2, dtype='float32')) return (numpy.array(labels, dtype='float32'), data)
def load_data(go_id): positive1 = list() positive2 = list() negative1 = list() negative2 = list() with open(DATA_ROOT + go_id + '.txt') as f: for line in f: line = line.strip().split(' ') label = int(line[0]) seq = line[2][:MAXLEN] hydro = encode_seq_hydro(seq, maxlen=MAXLEN) seq = encode_seq_one_hot(seq, maxlen=MAXLEN) if label == 1: positive1.append(seq) positive2.append(hydro) else: negative1.append(seq) negative2.append(hydro) shuffle(negative1, negative2, seed=0) n = len(positive1) data1 = negative1[:n] + positive1 data2 = negative2[:n] + positive2 labels = [0] * len(negative1) + [1] * len(positive1) shuffle(data1, data2, labels, seed=0) data = ( numpy.array(data1, dtype='float32'), numpy.array(data2, dtype='float32')) return ( numpy.array(labels, dtype='float32'), data)
def load_data(parent_id, go_id): data = list() labels = list() global nb_classes with open(DATA_ROOT + parent_id + '/' + go_id + '.txt') as f: for line in f: line = line.strip().split('\t') seq = line[1][:MAXLEN] labs = line[2].split('|') data.append(seq) for i in range(len(labs)): labs[i] = int(labs[i]) nb_classes = max(nb_classes, labs[i]) labels.append(labs) nb_classes += 1 for i in range(len(labels)): l = [0] * nb_classes for x in labels[i]: l[x] = 1 labels[i] = l for i in range(len(data)): data[i] = encode_seq_one_hot(data[i], maxlen=MAXLEN) shuffle(data, labels, seed=0) return numpy.array( labels, dtype='float32'), numpy.array(data, dtype='float32')
def predict_functions(classifier, seq): q = deque() q.append(classifier) functions = list() data = numpy.array([encode_seq_one_hot(seq, maxlen=MAXLEN)]) while len(q) > 0: x = q.popleft() ok = True for ch_id in x['children']: if 'model' in go[ch_id] and go[ch_id]['model']: model = go[ch_id]['model'] pred = model.predict_classes(data, batch_size=1, verbose=0) if pred[0][0] == 1: ok = False q.append(go[ch_id]) if ok: functions.append(x['id']) return functions
def predict_functions(classifier, seq): q = deque() q.append(classifier) functions = list() data = numpy.array([encode_seq_one_hot(seq, maxlen=MAXLEN)]) while len(q) > 0: x = q.popleft() ok = True for ch_id in x['children']: if 'model' in go[ch_id] and go[ch_id]['model']: model = go[ch_id]['model'] pred = model.predict_classes( data, batch_size=1, verbose=0) if pred[0][0] == 1: ok = False q.append(go[ch_id]) if ok: functions.append(x['id']) return functions
def load_data(parent_id, go_id): data1 = list() data2 = list() labels = list() positive1 = list() negative1 = list() positive2 = list() negative2 = list() with open(DATA_ROOT + parent_id + '/' + go_id + '.txt') as f: for line in f: line = line.strip().split(' ') label = int(line[0]) seq = line[2][:MAXLEN] sq1 = encode_seq_one_hot(seq, maxlen=MAXLEN) sq2 = encode_seq(OGAK980101, seq, maxlen=MAXLEN) sq3 = encode_seq(MEHP950102, seq, maxlen=MAXLEN) sq4 = encode_seq(CROG050101, seq, maxlen=MAXLEN) sq5 = encode_seq(TOBD000101, seq, maxlen=MAXLEN) sq6 = encode_seq(ALTS910101, seq, maxlen=MAXLEN) if label == 1: positive1.append([sq1]) positive2.append(sq1) else: negative1.append([sq1]) negative2.append(sq1) shuffle(negative1, negative2, seed=0) n = min(len(positive1), len(negative1)) data1 = negative1[:n] + positive1[:n] data2 = negative2[:n] + positive2[:n] labels = [0.0] * n + [1.0] * n # Previous was 30 shuffle(data1, data2, labels, seed=0) data = ( numpy.array(data1, dtype='float32'), numpy.array(data2, dtype='float32')) return (numpy.array(labels), data)
def load_data(parent_id, go_id): data1 = list() data2 = list() labels = list() positive1 = list() negative1 = list() positive2 = list() negative2 = list() with open(DATA_ROOT + parent_id + '/' + go_id + '.txt') as f: for line in f: line = line.strip().split(' ') label = int(line[0]) seq = line[2][:MAXLEN] sq1 = encode_seq_one_hot(seq, maxlen=MAXLEN) sq2 = encode_seq(OGAK980101, seq, maxlen=MAXLEN) sq3 = encode_seq(MEHP950102, seq, maxlen=MAXLEN) sq4 = encode_seq(CROG050101, seq, maxlen=MAXLEN) sq5 = encode_seq(TOBD000101, seq, maxlen=MAXLEN) sq6 = encode_seq(ALTS910101, seq, maxlen=MAXLEN) if label == 1: positive1.append([sq1]) positive2.append(sq1) else: negative1.append([sq1]) negative2.append(sq1) shuffle(negative1, negative2, seed=0) n = min(len(positive1), len(negative1)) data1 = negative1[:n] + positive1[:n] data2 = negative2[:n] + positive2[:n] labels = [0.0] * n + [1.0] * n # Previous was 30 shuffle(data1, data2, labels, seed=0) data = (numpy.array(data1, dtype='float32'), numpy.array(data2, dtype='float32')) return (numpy.array(labels), data)
def load_data(go_id): data1 = list() data2 = list() labels = list() positive1 = list() negative1 = list() positive2 = list() negative2 = list() with open(DATA_ROOT + '/' + go_id + '.txt') as f: for line in f: line = line.strip().split(' ') label = int(line[0]) seq = line[2][:MAXLEN] sq1 = encode_seq_one_hot(seq, maxlen=MAXLEN) sq2 = list() for l in seq: sq2.append(MEHP950102[AAINDEX[l]]) while len(sq2) < MAXLEN: sq2.append([0.0] * 20) if label == 1: positive1.append(sq1) positive2.append(sq2) else: negative1.append(sq1) negative2.append(sq2) shuffle(negative1, negative2) n = min(len(positive1), len(negative1)) data1 = negative1[:n] + positive1[:n] data2 = negative2[:n] + positive2[:n] labels = [0.0] * n + [1.0] * n # Previous was 30 shuffle(data1, data2, labels) return (numpy.array(labels), numpy.array(data1, dtype='float32'), numpy.array(data2, dtype='float32'), numpy.array(data1, dtype='float32'))
def load_data(go_id): data = list() labels = list() pos = 1 positive = list() negative = list() with open(DATA_ROOT + go_id + '.txt') as f: for line in f: line = line.strip().split(' ') label = int(line[0]) seq = [] seq = encode_seq_one_hot(line[2][:500], maxlen=MAXLEN) if label == pos: positive.append(seq) else: negative.append(seq) shuffle(negative, seed=0) n = len(positive) data = negative[:n] + positive labels = [0.0] * n + [1.0] * n # Previous was 30 shuffle(data, labels, seed=0) return numpy.array(labels), numpy.array(data, dtype="float32")