def main(): options = parser.parse_args() if options.logFile: logging.basicConfig(filename=options.logFile + '-' + str(RANK), level=logging.DEBUG, filemode='w', format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S') logging.info('program started') logging.info('command line: {}'.format(' '.join(sys.argv))) data_out = open(options.outputFile, "w") Drt = options.rtGap/2.0 Dmz = options.mzGap/2.0 idx = index.Index() logging.info('region: +/- {} sec, +/- {} mz'.format(Drt,Dmz)) count = 0 with open(options.inputFile, "r") as data_in: for datum in data_in: # rt, mz, amp, score = (float(v) for v in datum.split(",")) rt, mz, amp, score = [float(v) for v in datum.split(",")][:4] coord = (rt-Drt, mz-Dmz, rt+Drt, mz+Dmz) if idx.count(coord) == 0: idx.insert(count, coord) print >> data_out, "{}, {}, {}, {}".format(rt, mz, amp, score) count += 1 print "found {} regions".format(count) logging.debug('main mem: {}'.format(md_io.memory_usage_resource())) logging.info('program completed')
def main(MPI=None): # try parallel COMM = None RANK = 0 SIZE = 1 if MPI: COMM = MPI.COMM_WORLD RANK = COMM.Get_rank() SIZE = COMM.Get_size() status = MPI.Status() if RANK == 0: options = parser.parse_args() else: options = [] if SIZE > 1: options = COMM.bcast(options) if options.logFile: logging.basicConfig( filename=options.logFile + "-" + str(RANK), level=logging.DEBUG, filemode="w", format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %H:%M:%S", ) logging.info("program started") logging.info("command line: {}".format(" ".join(sys.argv))) if RANK == 0: if options.format == "mzml": reader = md_io.MZMLtoSpectrum elif options.format == "mzdata": # TODO: convert to MPI reader = md_io.parseMZDATA else: exit("Unknown mass spec data format: {}".format(options.format)) # distribute to worker pool # design # master | workers # --------- -------------------------------- # while workers | while !end # | send result (or None) # recv result | # store result | # get sender | # read data chunk | # send to sender | # | recv data (or None) # | do work (or end) # # # main split b/w master worker # read the input data file and extract useful contents if RANK == 0: data_out = open(options.outputFile, "w") half_window = int(math.ceil(options.rtSigma * options.rtWidth / 2.355)) logging.debug("half RT window {}".format(half_window)) if SIZE > 1: done = 1 else: done = 0 # only needed for sequential raw_data = None # only needed for sequential if not options.noScore: nextWindow = md_io.nextWindow(reader, options, half_window) else: nextWindow = reader(options) while done < SIZE: if SIZE > 1: raw_data, scores = COMM.recv(source=MPI.ANY_SOURCE, status=status) source = status.Get_source() if raw_data is not None: md_io.writeResults(data_out, raw_data, scores) ## Read data chunk try: spectra = nextWindow.next() except StopIteration: spectra = None # try removing low values if options.removeLow > 0 and spectra is not None: # use the specified low signal spectra = md_filter.removeLowSignal(spectra, options.removeLow) if SIZE > 1: COMM.send(spectra, dest=source) elif spectra is not None: # do work sequentially ## do work if not options.noScore: scores = md_filter.scoreSpectra(spectra, options) raw_data = spectra[len(spectra) // 2] else: scores = None raw_data = spectra if spectra is None: done += 1 # can only ever close each worker once else: # Worker scores = None raw = None # for stats send_time = [] recv_time = [] work_time = [] in_mem = md_io.memory_usage_resource() while True: t1 = time.time() COMM.send((raw, scores)) t2 = time.time() send_time.append(t2 - t1) raw = None scores = None logging.debug("rank {}, mem start, end: {:.1f} {:.1f}".format(RANK, in_mem, md_io.memory_usage_resource())) in_mem = md_io.memory_usage_resource() t1 = time.time() spectra = COMM.recv() t2 = time.time() recv_time.append(t2 - t1) if spectra is not None: ## do work try: t1 = time.time() if not options.noScore: scores = md_filter.scoreSpectra(spectra, options) raw = spectra[len(spectra) // 2] else: scores = None raw = spectra t2 = time.time() work_time.append(t2 - t1) spectra = None except MemoryError: logging.debug("rank {} Memory Error".format(RANK)) else: break logging.info("rank {}, count {}".format(RANK, len(work_time))) logging.info("rank {}, stats min 10% 25% 50% 75% 90% max".format(RANK)) limits = np.percentile(send_time, [0, 10, 25, 50, 75, 90, 100]) logging.info("rank {}, send {}".format(RANK, ", ".join(["{:.2f}".format(i) for i in limits]))) limits = np.percentile(recv_time, [0, 10, 25, 50, 75, 90, 100]) logging.info("rank {}, recv {}".format(RANK, ", ".join(["{:.2f}".format(i) for i in limits]))) limits = np.percentile(work_time, [0, 10, 25, 50, 75, 90, 100]) logging.info("rank {}, work {}".format(RANK, ", ".join(["{:.2f}".format(i) for i in limits]))) logging.debug("main mem: {}".format(md_io.memory_usage_resource())) logging.info("program completed")
def main(MPI=None): # try parallel COMM = None RANK = 0 SIZE = 1 if MPI: COMM = MPI.COMM_WORLD RANK = COMM.Get_rank() SIZE = COMM.Get_size() status = MPI.Status() if RANK == 0: options = parser.parse_args() else: options = [] if SIZE > 1: options = COMM.bcast(options) if options.logFile: logging.basicConfig(filename=options.logFile + '-' + str(RANK), level=logging.DEBUG, filemode='w', format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S') logging.info('program started') logging.info('command line: {}'.format(' '.join(sys.argv))) if RANK == 0: if options.format == 'mzml': reader = md_io.MZMLtoSpectrum elif options.format == 'mzdata': # TODO: convert to MPI reader = md_io.parseMZDATA else: exit("Unknown mass spec data format: {}".format(options.format)) # distribute to worker pool # design # master | workers # --------- -------------------------------- # while workers | while !end # | send result (or None) # recv result | # store result | # get sender | # read data chunk | # send to sender | # | recv data (or None) # | do work (or end) # # # main split b/w master worker # read the input data file and extract useful contents if RANK == 0: data_out = open(options.outputFile, "w") half_window = int(math.ceil(options.rtSigma * options.rtWidth / 2.355)) logging.debug('half RT window {}'.format(half_window)) if SIZE > 1: done = 1 else: done = 0 # only needed for sequential raw_data = None # only needed for sequential if not options.noScore: nextWindow = md_io.nextWindow(reader, options, half_window) else: nextWindow = reader(options) while done < SIZE: if SIZE > 1: raw_data, scores = COMM.recv(source=MPI.ANY_SOURCE, status=status) source = status.Get_source() if raw_data is not None: md_io.writeResults(data_out, raw_data, scores) ## Read data chunk try: spectra = nextWindow.next() except StopIteration: spectra = None # try removing low values if options.removeLow > 0 and spectra is not None: # use the specified low signal spectra = md_filter.removeLowSignal(spectra, options.removeLow) if SIZE > 1: COMM.send(spectra, dest=source) elif spectra is not None: # do work sequentially ## do work if not options.noScore: scores = md_filter.scoreSpectra(spectra, options) raw_data = spectra[len(spectra) // 2] else: scores = None raw_data = spectra if spectra is None: done += 1 # can only ever close each worker once else: # Worker scores = None raw = None # for stats send_time = [] recv_time = [] work_time = [] in_mem = md_io.memory_usage_resource() while True: t1 = time.time() COMM.send((raw, scores)) t2 = time.time() send_time.append(t2 - t1) raw = None scores = None logging.debug('rank {}, mem start, end: {:.1f} {:.1f}'.format( RANK, in_mem, md_io.memory_usage_resource())) in_mem = md_io.memory_usage_resource() t1 = time.time() spectra = COMM.recv() t2 = time.time() recv_time.append(t2 - t1) if spectra is not None: ## do work try: t1 = time.time() if not options.noScore: scores = md_filter.scoreSpectra(spectra, options) raw = spectra[len(spectra) // 2] else: scores = None raw = spectra t2 = time.time() work_time.append(t2 - t1) spectra = None except MemoryError: logging.debug('rank {} Memory Error'.format(RANK)) else: break logging.info('rank {}, count {}'.format(RANK, len(work_time))) logging.info('rank {}, stats min 10% 25% 50% 75% 90% max'.format(RANK)) limits = np.percentile(send_time, [0, 10, 25, 50, 75, 90, 100]) logging.info('rank {}, send {}'.format( RANK, ', '.join(['{:.2f}'.format(i) for i in limits]))) limits = np.percentile(recv_time, [0, 10, 25, 50, 75, 90, 100]) logging.info('rank {}, recv {}'.format( RANK, ', '.join(['{:.2f}'.format(i) for i in limits]))) limits = np.percentile(work_time, [0, 10, 25, 50, 75, 90, 100]) logging.info('rank {}, work {}'.format( RANK, ', '.join(['{:.2f}'.format(i) for i in limits]))) logging.debug('main mem: {}'.format(md_io.memory_usage_resource())) logging.info('program completed')