--invert Invert selection (leave -n out) [off] -r --remove-nonseq-chars Remove non sequence characters (like *) from sequences [off] ''') # parser p = ap.ArgumentParser() p.add_argument("-in", "--input-file") p.add_argument("-out", "--output-file") p.add_argument("--invert", action="store_true") p.add_argument("-n", "--names", metavar="NAMES") p.add_argument("-r", "--remove-nonseq-chars", action="store_true") args = p.parse_args() # read input file sys.stderr.write("[{0}]\tImporting list of names\n".format(at())) FOFN = open(args.names, "r") NAMES = [] for line in FOFN: NAMES.append(line.rstrip().split()[0].strip(">")) # FOFN.close() NAMES = list(set(NAMES)) sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), len(NAMES))) # parsing input fasta and writing to output k = 0 sys.stderr.write( "[{0}]\tParsing input FASTA and writing to output lines matching name list\n" .format(at()))
else: INPUT = sys.stdin if args.output_file: OUTPUT = open(args.output_file, "w") else: OUTPUT = sys.stdout if args.error_file: ERROR = open(args.error_file, "w") else: ERROR = sys.stderr if args.verbose: ERROR.write("[{0}] Reading input file, computing boundaries ... \n".format( at())) dic = {} for line in INPUT: lst = line.rstrip("\r\n\b").split("\t") scaffold = lst[0] position = int(lst[3]) # try: dic[scaffold] except KeyError: dic[scaffold] = [[position, position]] # k = 0 lastIndex = len(dic[scaffold]) - 1 while ((position > dic[scaffold][k][1] + 1) and (k < lastIndex)):
p.add_argument("-e", "--error-file") args = p.parse_args() if args.output_file: OUTPUT = open(args.output_file, "w") else: OUTPUT = sys.stdout if args.error_file: ERROR = open(args.error_file, "w") else: ERROR = sys.stderr # reading file A if args.verbose: ERROR.write("[{0}] Reading File A\n".format(at())) FileA = {} INPUT = open(args.gff_a, "r") for line in INPUT: lst = line.rstrip("\b\r\n").split("\t") if lst[2] == args.feature: scaffold = str(lst[0]) start = int(lst[3]) end = int(lst[4]) try: if len(FileA[scaffold]) > 0: for region in FileA[scaffold]: if end == region[0] - 1: idx = FileA[scaffold].index(region) FileA[scaffold][idx] = [start, region[1]]
return y # gtfdic function def gtfdic(x): t = x.split("; ") y = {} for z in t: result = re.search('(.*) \"(.*)\"', z) y[str(result.group(1))] = str(result.group(2)) return y ### CREATE NAMES LIST sys.stderr.write("[{0}]\tStoring names ...\n".format(at())) k = 0 INPUT = open(args.names, "r") NAMES = [] for line in INPUT: name = line.rstrip() NAMES.append(name) k += 1 INPUT.close() sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), k)) ### SELECT GENES
else: INPUT = sys.stdin if args.output_file: OUTPUT = open(args.output_file, "w") else: OUTPUT = sys.stdout if args.error_file: ERROR = open(args.error_file, "w") else: ERROR = sys.stderr if args.verbose: ERROR.write("[{0}] Reading input file, computing boundaries ... \n".format(at())) dic = {} for line in INPUT: lst = line.rstrip("\r\n\b").split("\t") scaffold = lst[0] position = int(lst[3]) # try: dic[scaffold] except KeyError: dic[scaffold] = [[position,position]] # k=0 lastIndex = len(dic[scaffold])-1 while ((position > dic[scaffold][k][1]+1) and (k < lastIndex)):
#while True: for vezes in range(5): try: qwertyuiop = 1+1 #r = session.get('https://www.inscricao.marinha.mil.br/marinha/index_concursos.jsp?id_concurso=384') except Exception as e: print('Conexão Rejeitada/Demorada') else: n_titulos = 0 n_titulos_novos = 0 #titulos_encontrados = [c.text for c in r.html.find('u')] titulos_encontrados = teste[vezes].copy() novos = [] inalterados = [] excluidos = [] hora_consulta = at(lt()) for c in range(len(titulos)): for b in range(len(titulos_encontrados)): if titulos[c] == titulos_encontrados[b]: inalterados.append(c) if not c in inalterados: excluidos.append(c) for c in range(len(titulos_encontrados)): for b in range(len(titulos)): if titulos[b] == titulos_encontrados[c]: pass if not c in inalterados and not c in excluidos: novos.append(c)
args = p.parse_args() if args.output_file: OUTPUT = open(args.output_file, "w") else: OUTPUT = sys.stdout if args.error_file: ERROR = open(args.error_file, "w") else: ERROR = sys.stderr # reading file A if args.verbose: ERROR.write("[{0}] Reading File A\n".format(at())) FileA = {} INPUT = open(args.gff_a, "r") for line in INPUT: lst = line.rstrip("\b\r\n").split("\t") if lst[2] == args.feature: scaffold = str(lst[0]) start = int(lst[3]) end = int(lst[4]) try: if len(FileA[scaffold]) > 0: for region in FileA[scaffold]: if end == region[0]-1: idx = FileA[scaffold].index(region) FileA[scaffold][idx] = [start,region[1]]
result = re.search('(.*)=(.*)', z) y[str(result.group(1))] = str(result.group(2)) return y # gtfdic function def gtfdic(x): t = x.split("; ") y = {} for z in t: result = re.search('(.*) \"(.*)\"', z) y[str(result.group(1))] = str(result.group(2)) return y ### CREATE NAMES LIST sys.stderr.write("[{0}]\tStoring names ...\n".format(at())) k=0 INPUT = open(args.names, "r") NAMES = [] for line in INPUT: name = line.rstrip() NAMES.append(name) k+=1 INPUT.close() sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), k)) ### SELECT GENES
-r --remove-nonseq-chars Remove non sequence characters (like *) from sequences [off] ''') # parser p = ap.ArgumentParser() p.add_argument("-in", "--input-file") p.add_argument("-out", "--output-file") p.add_argument("--invert", action="store_true") p.add_argument("-n", "--names", metavar="NAMES") p.add_argument("-r", "--remove-nonseq-chars", action="store_true") args = p.parse_args() # read input file sys.stderr.write("[{0}]\tImporting list of names\n".format(at())) FOFN = open(args.names, "r") NAMES = [] for line in FOFN: NAMES.append(line.rstrip().split()[0].strip(">")) # FOFN.close() NAMES = list(set(NAMES)) sys.stderr.write("[{0}]\tStored {1} names\n".format(at(), len(NAMES))) # parsing input fasta and writing to output k=0 sys.stderr.write("[{0}]\tParsing input FASTA and writing to output lines matching name list\n".format(at())) INPUT = open(args.input_file, "r")
fake_read = fake_read_name + "\n" + fake_read_seq + "\n" return fake_read revcomp = {"A":"T", "C":"G", "T":"A", "G":"C", "N":"N"} def paired_end(record, start_1, end_1, start_2, end_2, k): fake_pair_seq = [str(record.seq)[start_1:end_1], "".join([revcomp[i] for i in str(record.seq)[start_2:end_2]])[::-1]] fake_pair_name = [">" + str(record.id) + "_" + str(k) + "/1", ">" + str(record.id) + "_" + str(k) + "/2"] fake_pair = [fake_pair_name[0] + "\n" + fake_pair_seq[0] + "\n", fake_pair_name[1] + "\n" + fake_pair_seq[1] + "\n"] return fake_pair ### detect number of cpu and define pool ### if args.verbose: if args.threads > 1: ERROR.write("[{0}] Defined {1} threads\n".format(at(), args.threads)) elif args.threads == 1: ERROR.write("[{0}] Defined {1} thread\n".format(at(), args.threads)) if args.verbose: if args.type == "SE": ERROR.write("[{0}] Generating reads of {1} length every {2} positions\n".format(at(), args.read_length, args.step_size)) elif args.type == "PE": ERROR.write("[{0}] Generating paired-end reads of insert size {1} and length {2} every {3} positions\n".format(at(), args.insert_size, args.read_length, args.step_size)) ERROR.write("[{0}] Distance between mates: {1}\n".format(at(), args.insert_size-(2*args.read_length))) ### run processes
fake_pair_name = [ ">" + str(record.id) + "_" + str(k) + "/1", ">" + str(record.id) + "_" + str(k) + "/2" ] fake_pair = [ fake_pair_name[0] + "\n" + fake_pair_seq[0] + "\n", fake_pair_name[1] + "\n" + fake_pair_seq[1] + "\n" ] return fake_pair ### detect number of cpu and define pool ### if args.verbose: if args.threads > 1: ERROR.write("[{0}] Defined {1} threads\n".format(at(), args.threads)) elif args.threads == 1: ERROR.write("[{0}] Defined {1} thread\n".format(at(), args.threads)) if args.verbose: if args.type == "SE": ERROR.write( "[{0}] Generating reads of {1} length every {2} positions\n". format(at(), args.read_length, args.step_size)) elif args.type == "PE": ERROR.write( "[{0}] Generating paired-end reads of insert size {1} and length {2} every {3} positions\n" .format(at(), args.insert_size, args.read_length, args.step_size)) ERROR.write("[{0}] Distance between mates: {1}\n".format( at(), args.insert_size - (2 * args.read_length)))
# gtfdic function # import re def gtfdic(x): t = x.split("; ") y = {} for z in t: result = re.search('(.*) \"(.*)\"', z) y[str(result.group(1))] = str(result.group(2)) return y ### reading scaffold lengths if args.verbose: sys.stderr.write("[{0}]\tReading scaffold lengths\n".format(at())) Scaflen = {} SCAFS = open(args.scaffold_lengths, "r") for line in SCAFS: lst = line.rstrip().split("\t") Scaflen[lst[0]] = int(lst[1]) SCAFS.close() if args.verbose: sys.stderr.write("[{0}]\tStored information on {1} scaffolds\n".format( at(), len(Scaflen.keys()))) ### READ INPUT FILE