Exemple #1
0
'''start thread for writing the results'''
worker = Thread(target=writer, args=(writeQueue, outFile, args.verbose,))
worker.daemon = True
worker.start()

'''start background Thread that will run a loop to check run statistics and print
We use thread, because I think this is necessary for a process that watches global variables like linesTested'''
worker = Thread(target=checkStats)
worker.daemon = True
worker.start()

########################################################################################################################

#generate windows and queue

windowGenerator = genomics.nonOverlappingSitesWindows(genoFile, args.windSize,names=sampleData.indNames,
                                                      include = scafsToInclude,exclude = scafsToExclude)


if not args.test:
    for window in windowGenerator:
        windowQueue.put((windowsQueued,window))
        windowsQueued += 1
else:
    for window in windowGenerator:
        windowQueue.put((windowsQueued,window))
        windowsQueued += 1
        if windowsQueued == 10: break


############################################################################################################################################

if args.genoFile:
    genoFile = gzip.open(args.genoFile,"r") if args.genoFile.endswith(".gz") else open(args.genoFile, "r")
else:
    assert args.prefix != None, "Please povide a prefix for the ouput files"
    genoFile = sys.stdin


#########################################################################################
###################### read data ####################################################


#we will make a list of geno windows for each scaffold
scafWindows = []
for i, window in enumerate(genomics.nonOverlappingSitesWindows(genoFile, windSites=np.inf,names=args.samples)):
    scafWindows.append(window)
    sys.stderr.write("{} scaffolds read into memory\n".format(i+1))

genoFile.close()


#make concatenated sequences for each sample
names = scafWindows[0].names
sampleSeqs = defaultdict(list)
for scafWindow in scafWindows:
    windowSeqs = scafWindow.seqDict()
    for name in names:
        sampleSeqs[name] += [a for pair in zip(*genomics.splitSeq(windowSeqs[name],genoFormat=args.genoFormat)) for a in pair]