def check_a_line(a_line): if is_5col_bed(last_line): return True else: if check_3col_bed: if is_3col_bed(last_line): warn("You input a 3 column bed file like this:\t\t%s" % last_line[:50]) info("[3 Column to 5 Column] %s ==> %s " % (suffix_5col(fname), fname)) threetofive(fname) info( "Use %s instead of %s as the input BED and run the executive Again" % (suffix_5col(fname), fname)) else: error( "The input bed file %s has a wrong format!(3 column checking active)" % fname) print "Wrong Format:\t\t\t%s" % last_line[:50] print "Right Format should look like:\t%s" % ( 'chr1\t567577\t567578\tMACS_peak_1\t119.00') print "Or the depreciate 3-column format like this:\t%s" % ( 'chr1\t567577\t567578') else: error("The input bed file %s has a wrong format!" % fname) print "Wrong Format:\t\t\t%s" % last_line[:50] print "Right Format should look like:\t%s" % ( 'chr1\t567577\t567578\tMACS_peak_1\t119.00') return False
def check_fasta_dna(fname): """ Check if a file has the format of fasta @type fname: str @param fname: path of the file to be checked @rtype: bool @return: whether the file passed the fasta check """ if not check_common(fname, ".fa", maxsize=10737418240): # 10G=10*1024^3=10737418240 return False print fname with open(fname) as fasta_f: first_line = fasta_f.readline() if not first_line[0] == ">": error("The input fasta file %s has a wrong format!" % fname) print "Wrong Format:\t\t\t%s" % first_line[:50] print "Right Format should look like:\t%s" % ( '>chr1:1150372-1150572') return False second_line = fasta_f.readline() fasta_pattern_scd = "[AGCTN]+" if not re.search(fasta_pattern_scd, second_line): error("The input fasta file %s has a wrong format!" % fname) print "Wrong Format:\t\t\t%s" % second_line[:50] print "Right Format should look like:\t%s" % ('NGGGCCATTCA') return False return True
def fetch_seq_record(fasta_file, alpha=_alphabet): """ Fetch the sequence's nucleotide order and position information from a fasta file @type fasta_file: str @param fasta_file: path of the XML file @rtype: list @return: sequence information """ if not check_fasta_dna(fasta_file): error("fasta file validation failed") sys.exit(1) raw_seq_list = SeqIO.parse(fasta_file, "fasta", alpha) return list(raw_seq_list)
def check_cmd(command_): """ Check whether a command can be run in shell @type command_: str @param command_: the command you want to check, for example, "awk" @rtype: bool @return: whether the command passed the check """ exit_code = subcall(command_, shell=True, stdout=-1, stderr=-1) # Get the exit code without printing standard output print("check command %s" % command_).center(30, "-") if exit_code == 127: # when command not found, exit_code is 127 error("No such command as '%s'" % command_) return False else: return True
def check_xml(fname): """ Check if a file has the format of xml @type fname: str @param fname: path of the file to be checked @rtype: bool @return: whether the file passed the xml check """ if not check_common(fname, ".xml", maxsize=10485760): # 10M = 1024*1024*10 = 10485760 return False xmltree = ElementTree() try: xmltree.parse(fname) except: error("Fail to parser the xml file.") error( "The input XML file %s has a wrong format, please check it again." % fname) return False for pos in xmltree.findall("motif"): #get key and set empty element key = pos.get('id') if not key: error("No 'id' found for node, not a xml for motif information?") return False return True
def fetch_pssm_xml(xmlfile): """ Fetch the motif's pssm and other information from an XML file @type xmlfile: str @param xmlfile: path of the XML file @rtype: dict @return: motif information """ if not check_xml(xmlfile): error("xml file validation failed") sys.exit(1) mp = MP.MotifParser() mp.tag_list = ["dbd", "synonym", "description"] mp.Parser(xmlfile) for m_id in mp.motifs: one_pssm = mp.motifs[m_id]['pssm'] for one_pos in one_pssm: for i in range(0, 4): if one_pos[i] == 0.0: one_pos[i] += 0.00001 print "Found one" return mp.motifs
def check_common(fname, suffix, maxsize=1073741824): # 1GB=1024^3=1073741824B """ Check if a file has the specified suffix and smaller than maxsize @type fname: str @param fname: path of the file to be checked @type suffix: str @param suffix: the suffix limit, if not matched, WARNING will appears, but WON'T fail in this check @type maxsize: str @param maxsize: the max limit of the file to be checked @rtype: bool @return: whether the file passed the check """ if not os.path.isfile(fname): error("No such bed file: %s" % fname) return False if os.path.getsize(fname) > maxsize: error("The input file %s is larger than maxsize:%d bytes!" % (fname, maxsize)) return False if not fname.endswith(suffix): warn("Your input file %s doesn't have the suffix %s" % (fname, suffix)) return True