def main(): req_args = ['virus annotation file', '% MSA cutoff'] examples = ['../../Data/ProfileScan/hiv.prosite', '90'] utils_scripting.checkStart(sys.argv, req_args, examples, len(req_args), True) annotation_file = sys.argv[1] conserved_cutoff = float(sys.argv[2]) [virus2annotation, virus2proteinCount] = getCounts(annotation_file) for vp in virus2annotation.keys(): for pattern in virus2annotation[vp]: percent = (float(100) * float(len(virus2annotation[vp][pattern])) / float(len(virus2proteinCount[vp]))) if percent >= conserved_cutoff: print vp + '\t0\t0\t' + pattern + '\tseq\tELM' sys.stderr.write(vp + '\t' + pattern + '\t' + str(percent) + '\n')
def main(): req_args = ['virus annotation file', 'annotation tool', '% MSA cutoff'] examples = ['../../Data/ProfileScan/hiv.prosite', 'ProfileScan', '90'] utils_scripting.checkStart(sys.argv, req_args, examples, len(req_args), True) annotation_file = sys.argv[1] tool = sys.argv[2] conserved_cutoff = float(sys.argv[3]) protein2annotation = utils_motif.protein2annotation(annotation_file, {tool:True}) [virus2annotation, virus2proteinCount] = getCounts(protein2annotation) for vp in virus2annotation.keys(): for motif in virus2annotation[vp].keys(): percent = (float(100) * float(virus2annotation[vp][motif]) / float(virus2proteinCount[vp])) if percent >= conserved_cutoff: print vp + '\t0\t0\t' + motif + '\tseq\t' + tool sys.stderr.write(vp + '\t' + motif + '\t' + str(percent) + '\n')
for s in seq: tempSeq = tempSeq + s offset = 0 while match: for elm in pattern2elm[elm_pattern]: printResult(protein, elm, match, tempSeq, offset) tempSeq = tempSeq[int(match.start())+1:] offset += int( match.start() ) + 1 match = p.search(tempSeq) req_args = ['pattern file', 'fasta file'] examples = ['../../Data/ELM/elm2pattern', '../../Data/FASTA/Human/hprd.intr.fasta'] utils_scripting.checkStart(sys.argv, req_args, examples, len(req_args), True) input_pattern_file = sys.argv[1] fasta_file = sys.argv[2] pattern2regex = {} pattern2elm = defaultdict(dict) with open(input_pattern_file) as f: for line in f: elm, pattern = line.strip().split('\t') pattern2elm[pattern][elm] = True for pattern in pattern2elm: pattern2regex[pattern] = re.compile(pattern) for protein, seq in utils.fasta_iter(fasta_file): matchSeq(protein, seq, pattern2elm, pattern2regex)