def generateNDTAlign(tplname, tgtname, distname, observations, transitions, init_alignments_path): tpl = load_tpl(os.path.join(args.t, tplname)) if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'): tgt = load_hhm(os.path.join(args.q, tgtname)) else: tgt = load_tgt(os.path.join(args.q, tgtname)) tgtseq = tgt['sequence'] tplseq = tpl['sequence'] tplname = tpl['name'] tgtname = tgt['name'] model_size = observations.size(0) sequence = Batchpair(tpl['name'], tgt['name'], tplseq, tgtseq, tpl, model_size) dis = tpl['atomDistMatrix']['CbCb'] dis = torch.from_numpy(dis).float() dis = torch.where(torch.lt(dis, 0), torch.ones(dis.size()) * 10000, dis) sequence.set_dismatrix(dis) pair_dis, disc_method, _ = Load_EdgeScore(os.path.join(args.d, distname), tgt) pair_dis = torch.from_numpy(pair_dis) disc_method = torch.from_numpy(disc_method) alignment_paths = [] for ba in range(model_size): if os.path.exists( os.path.join(init_alignments_path, '%s-%s.%d.fasta' % (tplname, tgtname, ba))): alignment_path = os.path.join( init_alignments_path, '%s-%s.%d.fasta' % (tplname, tgtname, ba)) else: alignment_path = os.path.join(init_alignments_path, '%s-%s.fasta' % (tplname, tgtname)) alignment_paths.append(alignment_path) alignments = sequence.alignment_init(observations, transitions, args.a) node_score, edge_score, norm_score = sequence.compute_NDT_score( alignments, observations, transitions, pair_dis, disc_method) sequence.update4NDT(alignments, node_score, edge_score, norm_score) alignment = sequence.maxalign output = sequence.get_RNDToutput( alignment, observations[sequence.bestobs].unsqueeze(0), transitions[sequence.bestobs].unsqueeze(0), pair_dis, disc_method, 'dist', 1) alignment_out = sequence.get_alignment_output() return [tpl['name'], tgt['name'], output, alignment_out]
def generateAlign(tplname, tgtname, observations, transitions): tpl = load_tpl(os.path.join(args.t, tplname)) if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'): tgt = load_hhm(args.q) else: tgt = load_tgt(args.q) tgtseq = tgt['sequence'] tplseq = tpl['sequence'] sequence = Batchpair(tpl['name'], tgt['name'], tplseq, tgtseq, tpl) observation = torch.mean(observations, 0, keepdim=True) transition = torch.mean(transitions, 0, keepdim=True) alignments = sequence.alignment_init(observation, transition, args.a) output = sequence.get_CNF_output(alignments[0], observation, transition, args.a) sequence.maxalign = alignments[0] alignment_output = sequence.get_alignment_output() return [tpl['name'], tgt['name'], alignment_output, output]
def generateAlign(tplname, tgtname, observations, transitions): tpl = load_tpl(os.path.join(args.t, tplname)) if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'): tgt = load_hhm(os.path.join(args.q, tgtname)) else: tgt = load_tgt(os.path.join(args.q, tgtname)) tgtseq = tgt['sequence'] tplseq = tpl['sequence'] model_size = observations.size(0) sequence = Batchpair(tpl['name'], tgt['name'], tplseq, tgtseq, tpl, model_size) alignments = sequence.alignment_init(observations, transitions, args.a) alignments_output = [] for ba in range(sequence.batchsize): alignments_output.append( alignment_output(tpl['name'], tgt['name'], tpl['sequence'], tgt['sequence'], alignments[ba])) return [tpl['name'], tgt['name'], alignments_output]
def compute_alignment(tplname, observations, transitions, pair_dis, disc_method, iteration, edge_type="dist", Node_Weight=1): if args.q.endswith('.hhm') or args.q.endswith('.hhm.pkl'): tgt = load_hhm(args.q) else: tgt = load_tgt(args.q) tgtseq = tgt['sequence'] tpl = load_tpl(os.path.join(args.t, tplname)) tplseq = tpl['sequence'] sequence = Batchpair(tpl['name'], tgt['name'], tplseq, tgtseq, tpl, len(args.m)) sequence.set_iter(iteration) # set the dis_matrix if 'atomDistMatrix' in tpl: dis_matrix = tpl['atomDistMatrix']['CbCb'] else: dis_matrix = Compute_CbCb_distance_matrix(tpl) dis_matrix = torch.from_numpy(dis_matrix).float() dis_matrix = torch.where(torch.lt(dis_matrix, 0), torch.ones(dis_matrix.size())*10000, dis_matrix) sequence.set_dismatrix(dis_matrix) observations = sequence.ModifyObs(observations, Node_Weight) alignment = sequence.alignment_init( observations, transitions, args.a) sequence.set_alignment(alignment) if args.extra != []: sequence.add_init_alignment(args.extra) observations = sequence.add_observation(observations) searchspace = sequence.template_search_space(disc_method, edge_type) sequence.set_searchspace(searchspace) alignment, output = sequence.ADMM_algorithm( observations, transitions, pair_dis, disc_method, edge_type, Node_Weight) sequence.set_output(output) alignment_output = sequence.get_alignment_output() return [tpl['name'], tgt['name'], alignment_output, output]
# check and load template (.tpl) and sequence(.tgt) file if os.path.exists(args.t): tpl = load_tpl(args.t) else: print("the template is not exist") sys.exit(-1) if os.path.exists(args.q): if args.q.endswith('hhm') or args.q.endswith('.hhm.pkl'): if any(SS3FeatureModes) or any(SS8FeatureModes) \ or any(ACCFeatureModes): print("Please use TGT format file as input or use model " "not using structure information") sys.exit(-1) tgt = load_hhm(args.q) else: tgt = load_tgt(args.q) else: print("the query sequence is not exist") sys.exit(-1) # check and load pairwise potential file if not os.path.exists(args.d): print("the distance potential %s is not exist" % args.d) sys.exit(-1) # load distance potential pair_dis, disc_method, edge_type = Load_EdgeScore(args.d, tgt) pair_dis = torch.from_numpy(pair_dis) pair_distance = pair_dis.detach().share_memory_() disc_method = torch.Tensor(disc_method).detach().share_memory_() print("finish load data in %.2f s.." % (time.time() - start))