def is_data_index_sample_file(file_path): """ Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when a tool shed repository is being installed into a Galaxy instance. """ # Currently most data index files are tabular, so check that first. We'll assume that # if the file is tabular, it's ok to copy. if is_column_based(file_path): return True # If the file is any of the following, don't copy it. if checkers.check_html(file_path): return False if checkers.check_image(file_path): return False if checkers.check_binary(name=file_path): return False if checkers.is_bz2(file_path): return False if checkers.is_gzip(file_path): return False if checkers.check_zip(file_path): return False # Default to copying the file if none of the above are true. return True
def iszip( file_path ): return checkers.check_zip( file_path )
if not chunk: break os.write( fd, chunk ) os.close( fd ) bzipped_file.close() # Replace the bzipped file with the decompressed file if it's safe to do so if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: dataset.path = uncompressed else: shutil.move( uncompressed, dataset.path ) os.chmod(dataset.path, 0644) dataset.name = dataset.name.rstrip( '.bz2' ) data_type = 'bz2' if not data_type: # See if we have a zip archive is_zipped = check_zip( dataset.path ) if is_zipped: if link_data_only == 'copy_files': CHUNK_SIZE = 2 ** 20 # 1Mb uncompressed = None uncompressed_name = None unzipped = False z = zipfile.ZipFile( dataset.path ) for name in z.namelist(): if name.endswith('/'): continue if unzipped: stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' break fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) if sys.version_info[:2] >= ( 2, 6 ):
break os.write(fd, chunk) os.close(fd) bzipped_file.close() # Replace the bzipped file with the decompressed file if it's safe to do so if dataset.type in ('server_dir', 'path_paste') or not in_place: dataset.path = uncompressed else: shutil.move(uncompressed, dataset.path) os.chmod(dataset.path, 0644) dataset.name = dataset.name.rstrip('.bz2') data_type = 'bz2' if not data_type: # See if we have a zip archive is_zipped = check_zip(dataset.path) if is_zipped: if link_data_only == 'copy_files': CHUNK_SIZE = 2**20 # 1Mb uncompressed = None uncompressed_name = None unzipped = False z = zipfile.ZipFile(dataset.path) for name in z.namelist(): if name.endswith('/'): continue if unzipped: stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' break fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' %
def iszip(file_path): return checkers.check_zip(file_path)
def main(archive, archivename, logfile, logid, workdir, merge, rm_header=0, concat=''): # create a temporary repository # tmp_dir = tempfile.mkdtemp(dir=os.getcwd()) os.mkdir("decompress_files") # open log file mylog = open(logfile, "w") is_gzipped, is_gzvalid = check_gzip(archive) is_bzipped, is_bzvalid = check_bz2(archive) # extract all files in a temp directory # test if is a zip file if check_zip(archive): with zipfile.ZipFile(archive, 'r') as myarchive: myarchive.extractall("decompress_files") # test if is a tar file elif tarfile.is_tarfile(archive): mytarfile = tarfile.TarFile.open(archive) mytarfile.extractall("decompress_files") mytarfile.close() # test if is a gzip file elif is_gzipped and is_gzvalid: mygzfile = gzip.open(archive, 'rb') myungzippedfile = open( "decompress_files/" + os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20) for i in iter(lambda: mygzfile.read(2**20), ''): myungzippedfile.write(i) myungzippedfile.close() mygzfile.close() elif is_bzipped and is_bzvalid: mybzfile = bz2.BZ2File(archive, 'rb') myunbzippedfile = open( "decompress_files/" + os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20) for i in iter(lambda: mybzfile.read(2**20), ''): myunbzippedfile.write(i) myunbzippedfile.close() mybzfile.close() # test if merge is enable if merge == "true": mylog.write("Merge option is enabled with " + str(rm_header) + " lines to deleted\n\n") myfinalfile = open(concat, "w") for myfile in listdirectory("decompress_files"): myopenfile = open(myfile, "r") nblinesremove = 0 mylog.write( os.path.basename(myfile) + " is extracted from the archive and is added into the result file\n" ) for line in myopenfile: # if not equal, don't write if int(rm_header) != nblinesremove: nblinesremove = nblinesremove + 1 else: # write the line into the final file myfinalfile.write(line) myfinalfile.close() shutil.rmtree("decompress_files") else: # if merge is disable mylog.write("Merge option is disabled\n\n") # move all files (recursively) in the working dir for myfile in listdirectory("decompress_files"): myfileclean = myfile.replace(" ", "\ ") mylog.write( os.path.basename(myfileclean) + " is extracted from the archive \n") fileext = os.path.splitext(myfile)[1].replace(".", "") # if no extension if fileext == '': shutil.move(os.path.abspath(myfile), os.path.abspath(myfile) + ".txt") if fileext == 'fa': shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fa", ".fasta")) if fileext == 'fq': shutil.move(os.path.abspath(myfile), os.path.abspath(myfile).replace(".fq", ".fastq")) mylog.write( "\nPlease refresh your history if all files are not present\n") mylog.close()