def main(): args = parser.parse_args() if args.output: out_h = NamedTemporaryFile(dir=dirname(abspath(args.output))) else: out_h = stdout blacklist = import_records([args.blacklist]) records = import_records(args.files) (remaining, removals) = bind_filter(records, blacklist) remaining = sort_records(remaining) removals = sort_records(removals) if args.removals: for removal in removals: (name,type,value) = removal print >> stderr, name, type, value else: for (name,type,value) in remaining: print >>out_h, name, type, value if args.clean: if len(removals) == 0: if args.output: rename(out_h.name, args.output) out_h.delete = False exit(0) else: exit(1) else: if args.output: rename(out_h.name, args.output) out_h.delete = False exit(0)
def _generate_zip_file(self, files_holder, name_prefix='material', name_suffix=None): """Generate a zip file containing the files passed. :param files_holder: An iterable (or an iterable containing) object that contains the files to be added in the zip file. :param name_prefix: The prefix to the zip file name :param name_suffix: The suffix to the zip file name :return: The generated zip file. """ temp_file = NamedTemporaryFile(suffix='indico.tmp', dir=config.TEMP_DIR) with ZipFile(temp_file.name, 'w', allowZip64=True) as zip_handler: self.used_filenames = set() for item in self._iter_items(files_holder): name = self._prepare_folder_structure(item) self.used_filenames.add(name) with item.storage.get_local_path( item.storage_file_id) as filepath: zip_handler.write(filepath.encode('utf-8'), name) temp_file.delete = False zip_file_name = '{}-{}.zip'.format( name_prefix, name_suffix) if name_suffix else '{}.zip'.format(name_prefix) chmod_umask(temp_file.name) return send_file(zip_file_name, temp_file.name, 'application/zip', inline=False)
def create(self): """Trigger the creation of a ZIP file containing the site.""" temp_file = NamedTemporaryFile(suffix='indico.tmp', dir=config.TEMP_DIR) self._zip_file = ZipFile(temp_file.name, 'w', allowZip64=True) with collect_static_files() as used_assets: # create the home page html html = self._create_home().encode('utf-8') # Mathjax plugins can only be known in runtime self._copy_folder(os.path.join(self._content_dir, 'static', 'dist', 'js', 'mathjax'), os.path.join(self._static_dir, 'dist', 'js', 'mathjax')) # Materials and additional pages self._copy_all_material() self._create_other_pages() # Create index.html file (main page for the event) index_path = os.path.join(self._content_dir, 'index.html') self._zip_file.writestr(index_path, html) self._write_generated_js() # Copy static assets to ZIP file self._copy_static_files(used_assets) self._copy_plugin_files(used_assets) if config.CUSTOMIZATION_DIR: self._copy_customization_files(used_assets) temp_file.delete = False chmod_umask(temp_file.name) self._zip_file.close() return temp_file.name
def _generate_zip_file(self, files_holder, name_prefix='material', name_suffix=None): """Generate a zip file containing the files passed. :param files_holder: An iterable (or an iterable containing) object that contains the files to be added in the zip file. :param name_prefix: The prifix to the zip file name :param name_suffix: The suffix to the zip file name :return: The generated zip file. """ from indico.util.tasks import delete_file temp_file = NamedTemporaryFile(suffix='indico.tmp', dir=Config.getInstance().getTempDir()) with ZipFile(temp_file.name, 'w', allowZip64=True) as zip_handler: self.used_filenames = set() for item in self._iter_items(files_holder): name = self._prepare_folder_structure(item) self.used_filenames.add(name) with item.storage.get_local_path(item.storage_file_id) as filepath: zip_handler.write(filepath.encode('utf-8'), name) # Delete the temporary file after some time. Even for a large file we don't # need a higher delay since the webserver will keep it open anyway until it's # done sending it to the client. delete_file.apply_async(args=[temp_file.name], countdown=3600) temp_file.delete = False zip_file_name = '{}-{}.zip'.format(name_prefix, name_suffix) if name_suffix else '{}.zip'.format(name_prefix) return send_file(zip_file_name, temp_file.name, 'application/zip', inline=False)
def write_file(self, filename, data): f = NamedTemporaryFile(prefix='hosted-py-tmp', dir=os.getcwd()) try: f.write(data) except: traceback.print_exc() raise else: f.delete = False f.close() os.rename(f.name, filename)
def _generate_zip_file(attachments, regform): temp_file = NamedTemporaryFile(suffix='indico.tmp', dir=Config.getInstance().getTempDir()) with ZipFile(temp_file.name, 'w', allowZip64=True) as zip_handler: for reg_attachments in attachments.itervalues(): for reg_attachment in reg_attachments: name = _prepare_folder_structure(reg_attachment) with reg_attachment.storage.get_local_path(reg_attachment.storage_file_id) as filepath: zip_handler.write(filepath, name) # Delete the temporary file after some time. Even for a large file we don't # need a higher delay since the webserver will keep it open anyway until it's # done sending it to the client. delete_file.apply_async(args=[temp_file.name], countdown=3600) temp_file.delete = False return send_file('attachments-{}.zip'.format(regform.id), temp_file.name, 'application/zip', inline=False)
def scratch_cached(self, filename, generator): cached = os.path.join(os.environ['SCRATCH'], filename) if not os.path.exists(cached): f = NamedTemporaryFile(prefix='scratch-cached-tmp', dir=os.environ['SCRATCH']) try: generator(f) except: raise else: f.delete = False f.close() os.rename(f.name, cached) if os.path.exists(filename): try: os.unlink(filename) except: pass os.symlink(cached, filename)
def _generate_zip_file(self, files_holder, name_prefix='material', name_suffix=None): """Generate a zip file containing the files passed. :param files_holder: An iterable (or an iterable containing) object that contains the files to be added in the zip file. :param name_prefix: The prefix to the zip file name :param name_suffix: The suffix to the zip file name :return: The generated zip file. """ temp_file = NamedTemporaryFile(suffix='indico.tmp', dir=config.TEMP_DIR) with ZipFile(temp_file.name, 'w', allowZip64=True) as zip_handler: self.used_filenames = set() for item in self._iter_items(files_holder): name = self._prepare_folder_structure(item) self.used_filenames.add(name) with item.storage.get_local_path(item.storage_file_id) as filepath: zip_handler.write(filepath.encode('utf-8'), name) temp_file.delete = False zip_file_name = '{}-{}.zip'.format(name_prefix, name_suffix) if name_suffix else '{}.zip'.format(name_prefix) chmod_umask(temp_file.name) return send_file(zip_file_name, temp_file.name, 'application/zip', inline=False)
# let outf get auto-deleted if we made no changes outf.close() if os.name=='nt': os.unlink(outf.name) else: if isinstance(args.inplace, basestring): ext = args.inplace if os.path.exists(inf.name + ext): p.error("can't make backup of %s: %s already exists" % (inf.name, inf.name + ext)) try: os.rename(inf.name, inf.name + ext) except OSError as e: p.error("can't rename %s to %s: %s" % (inf.name, inf.name + ext, str(e))) # don't mark output file with delete=False until all the preceding steps have succeeded outf.delete = False outf.close() shutil.copymode(inf.name + ext, outf.name) else: outf.delete = False outf.close() shutil.copymode(inf.name, outf.name) # rename won't work on Windows if destination exists os.unlink(inf.name) os.rename(outf.name, inf.name) if not args.inplace: outf.close() if not args.no_exit_codes:
if os.name == 'nt': os.unlink(outf.name) else: if isinstance(args.inplace, basestring): ext = args.inplace if os.path.exists(inf.name + ext): p.error("can't make backup of %s: %s already exists" % (inf.name, inf.name + ext)) try: os.rename(inf.name, inf.name + ext) except OSError as e: p.error("can't rename %s to %s: %s" % (inf.name, inf.name + ext, str(e))) # don't mark output file with delete=False until all the preceding steps have succeeded outf.delete = False outf.close() shutil.copymode(inf.name + ext, outf.name) else: outf.delete = False outf.close() shutil.copymode(inf.name, outf.name) # rename won't work on Windows if destination exists os.unlink(inf.name) os.rename(outf.name, inf.name) if not args.inplace: outf.close() if not args.no_exit_codes:
def main(): """ 1: Collect Tx from one or more species that are within at least some r value of similarity to a provided example Tx or a submitted hypothetical expression vector. 2: Use GTFs, BEDtools, and genome FASTAs to extract the upstream flanking sequences into a new FASTA for use in motif discovery. """ desc = """(1) Collect Tx from one or more species that are within at least some r value of similarity to a provided example Tx or a submitted hypothetical expression vector. (2) Use GTFs, BEDtools, and genome FASTAs to extract the upstream flanking sequences into a new FASTA for use in motif discovery.""" parser = argparse.ArgumentParser(description=desc) FileType = argparse.FileType logger = logging.getLogger(sys.argv[0].split('/')[-1]) parser.add_argument('--expn-path', type=str, required=True, help="""Path to expression table file. \n(default: %(default)s)""") parser.add_argument('--tx-name', type=str, required=True, help="""Name of the Tx you want to use as a model. (default: %(default)s)""") parser.add_argument('--pearson-filter-type', type=str, default='>=', choices=['>=','<='], help="""Use >= to find similar expn profiles or <= to find opposite profiles. (default: %(default)s)""") parser.add_argument('--pearson-filter-thresh', type=float, default=0.7, help="""Set the threshold of the Pearson r value for the filter. (default: %(default)s)""") parser.add_argument('--pval-filter-thresh', type=float, default=0.05, help="""Set the upper threshold for the p-value of the Pearson r values to keep. (default: %(default)s)""") parser.add_argument('--tx-name-header', type=str, required=True, help="""The text of the header in the expn table where tx names are stored. (default: %(default)s)""") parser.add_argument('--cond-headers', type=str, required=True, nargs='+', help="""A list of the text of the headers in the expn table where the values for each condition are stored (--cond-headers cond1 cond2 ...). (default: %(default)s)""") parser.add_argument('--manual-headers', type=str, required=False, nargs='?', help="""If the expn table does not have headers, provide a list of ordered names for them here. (default: %(default)s)""") parser.add_argument('--gtf', type=str, required=True, help="""The path to the gtf file that you want to use for your annotation. (default: %(default)s)""") parser.add_argument('--gtf-index', type=str, required=True, help="""The path to the gtf index file generated from "gtf_to_genes". (default: %(default)s)""") parser.add_argument('--genome-fastas', type=str, required=True, nargs='+', help="""A list of paths to genomic fasta files or directories where they are stored. (default: %(default)s)""") parser.add_argument('--flank-len', type=int, default=2000, help="""The length in bp that should be harvested from the 5' end of the tx. (default: %(default)s)""") parser.add_argument('--out-dir', type=str, default='.', help="""A path to a directory where you would like the output files to be stored. (default: %(default)s)""") parser.add_argument('--dump-megafasta', action='store_true', help="""Save concatonated fasta file for debugging. (default: %(default)s)""") parser.add_argument('--dump-stats', action='store_true', help="""Print a list of Tx/gene names and the r- p-values that passed the filter and exit without getting fastas. (default: %(default)s)""") args = parser.parse_args() # tmp files will be stored here tmp_files = Bag() # 1: Use a correlation filter to pull out any Tx that is sufficiently similar to the model Tx vectDict = mangle_expn_vectors(expnPath=args.expn_path,txNameHeader=args.tx_name_header,condHeaders=args.cond_headers,manualHeaders=args.manual_headers) filterFunc = eval("lambda x: x %s %f" % (args.pearson_filter_type, args.pearson_filter_thresh)) filterDict = pearsonExpnFilter(modelVector=vectDict[args.tx_name], targetVectors=vectDict, filterFunc=filterFunc) # remove vectors whose r's pVal is not significant (<=0.05) sigVectors = {} for key in filterDict: if key[1] <= args.pval_filter_thresh: sigVectors[key] = filterDict[key] matchVectors = sigVectors ## Impose a distance filter to further refine the gene set ## incorperating magnitudes of the absolute levels of gene expression ## set the boundries of acceptable deviation for the target gene mean expression ## mangitude by bootstrapping. The metric for comparison will be the average of ## the differences of each point in remaining vectors against the target ## vector. ## 1) calc the metrics for each remaining gene's vector ## PS: numpy rocks. ##avgDists = {} ##for key in sigVectors: ##avgDist_i = np.mean(np.subtract(vectDict[args.tx_name], ##sigVectors[key])) ##avgDists[key] = avgDist_i ### 2) bootstrap that bitch and give me a stdErr! ##medianEst,stdErrEst,lo95,hi95 = basic_bootstrap_est(avgDists.values()) ### 3) recover keys that fall within +/- 1 SE ##matchVectors = {} ##for key in avgDists: ##avgDist = avgDists[key] ##if (avgDist >= -stdErrEst) and (avgDist <= stdErrEst): ##matchVectors[key] = sigVectors[key] # Sort txList so that the highest r values are at the top # and save vectors and this info out to file txList = sorted(matchVectors.keys(),key=lambda x: x[0], reverse=True) sortedTxListFile = NamedTemporaryFile(mode='w+t',prefix='txExpnVectFilteredBy_r.',suffix=".tsv",delete=False) for row in txList: if args.dump_stats: sys.stdout.write('%s\t%s\n' % ('\t'.join(map(str,row)),'\t'.join(map(str,matchVectors[row])))) else: sortedTxListFile.write('%s\t%s\n' % ('\t'.join(map(str,row)),'\t'.join(map(str,matchVectors[row])))) if args.dump_stats: sortedTxListFile.close() exit(0) tmp_files['sortedTxListFile'] = sortedTxListFile sortedTxListFile.close() g2gObj = gtf_to_genes.get_indexed_genes_matching_gtf_file_name(index_file_name=args.gtf_index, logger=logger, regex_str=args.gtf)[-1] txDict = filter_GTF_4_Tx(txList=[x[2] for x in txList],g2gObj=g2gObj) tmp_files['txBedFile'] = convert_2_bed(txDict=txDict) # 2: Use GTFs, BEDtools, and genome FASTAs to extract the upstream flanking sequences into a new FASTA fastaRecLengths,fastaSeqs = fastaRec_length_indexer(fastaFiles=args.genome_fastas) tmpFastaRecLengthFile = NamedTemporaryFile(mode='w+b',prefix='tmpFastaRecLengthFile.',suffix=".txt") for seqRec in fastaRecLengths: tmpFastaRecLengthFile.write("%s\t%s\n" % (seqRec,fastaRecLengths[seqRec])) tmpFastaRecLengthFile.flush() # TODO: concatonate fasta files megaFastaFile = NamedTemporaryFile(mode='w+b',prefix='tmpMegaFastaFile.',suffix=".fas") for fasta in fastaSeqs: megaFastaFile.write('>%s\n%s\n' % (fasta,fastaSeqs[fasta])) megaFastaFile.flush() tmp_files['flankBed'] = get_fastas(txBed=tmp_files.txBedFile.name,genomeFasta=megaFastaFile.name,lenIndex=tmpFastaRecLengthFile.name,lenFlanks=args.flank_len) # CLEAN UP: # TODO: Close all tmp_files, and move to args.outDir mkdirp(args.out_dir) for f in tmp_files: try: tmp_files[f].delete = False except AttributeError: pass try: tmp_files[f].close() except AttributeError: pass # ['sortedTxListFile', 'flankBed', 'txBedFile', 'flankFasta'] sortedTxListFile = "%s/sortedTxList.tsv" % (args.out_dir) flankBed = "%s/flankBed.bed" % (args.out_dir) txBedFile = "%s/txBed.bed" % (args.out_dir) flankFasta = "%s/flankFasta.fas" % (args.out_dir) shutil.move(tmp_files.sortedTxListFile.name, sortedTxListFile) os.chmod(sortedTxListFile,0775) tmp_files.flankBed.saveas(flankBed) os.chmod(flankBed,0775) shutil.move(tmp_files.txBedFile.name, txBedFile) os.chmod(txBedFile,0775) shutil.move(tmp_files.flankBed.seqfn, flankFasta) os.chmod(flankFasta,0775) if args.dump_megafasta: megaFasta = "%s/megaFasta.fas" % (args.out_dir) megaFastaFile.delete = False megaFastaFile.close() shutil.move(megaFastaFile.name, megaFasta) os.chmod(megaFasta,0775)