Beispiel #1
0
def is_data_index_sample_file(file_path):
    """
    Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when
    a tool shed repository is being installed into a Galaxy instance.
    """
    # Currently most data index files are tabular, so check that first.  We'll assume that
    # if the file is tabular, it's ok to copy.
    if is_column_based(file_path):
        return True
    # If the file is any of the following, don't copy it.
    if checkers.check_html(file_path):
        return False
    if checkers.check_image(file_path):
        return False
    if checkers.check_binary(name=file_path):
        return False
    if checkers.is_bz2(file_path):
        return False
    if checkers.is_gzip(file_path):
        return False
    if checkers.check_zip(file_path):
        return False
    # Default to copying the file if none of the above are true.
    return True
Beispiel #2
0
def is_data_index_sample_file(file_path):
    """
    Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when
    a tool shed repository is being installed into a Galaxy instance.
    """
    # Currently most data index files are tabular, so check that first.  We'll assume that
    # if the file is tabular, it's ok to copy.
    if is_column_based(file_path):
        return True
    # If the file is any of the following, don't copy it.
    if checkers.check_html(file_path):
        return False
    if checkers.check_image(file_path):
        return False
    if checkers.check_binary(name=file_path):
        return False
    if checkers.is_bz2(file_path):
        return False
    if checkers.is_gzip(file_path):
        return False
    if checkers.check_zip(file_path):
        return False
    # Default to copying the file if none of the above are true.
    return True
Beispiel #3
0
def iszip( file_path ):
    return checkers.check_zip( file_path )
Beispiel #4
0
                 if not chunk:
                     break
                 os.write( fd, chunk )
             os.close( fd )
             bzipped_file.close()
             # Replace the bzipped file with the decompressed file if it's safe to do so
             if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place:
                 dataset.path = uncompressed
             else:
                 shutil.move( uncompressed, dataset.path )
             os.chmod(dataset.path, 0644)
         dataset.name = dataset.name.rstrip( '.bz2' )
         data_type = 'bz2'
 if not data_type:
     # See if we have a zip archive
     is_zipped = check_zip( dataset.path )
     if is_zipped:
         if link_data_only == 'copy_files':
             CHUNK_SIZE = 2 ** 20  # 1Mb
             uncompressed = None
             uncompressed_name = None
             unzipped = False
             z = zipfile.ZipFile( dataset.path )
             for name in z.namelist():
                 if name.endswith('/'):
                     continue
                 if unzipped:
                     stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                     break
                 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False )
                 if sys.version_info[:2] >= ( 2, 6 ):
                     break
                 os.write(fd, chunk)
             os.close(fd)
             bzipped_file.close()
             # Replace the bzipped file with the decompressed file if it's safe to do so
             if dataset.type in ('server_dir',
                                 'path_paste') or not in_place:
                 dataset.path = uncompressed
             else:
                 shutil.move(uncompressed, dataset.path)
             os.chmod(dataset.path, 0644)
         dataset.name = dataset.name.rstrip('.bz2')
         data_type = 'bz2'
 if not data_type:
     # See if we have a zip archive
     is_zipped = check_zip(dataset.path)
     if is_zipped:
         if link_data_only == 'copy_files':
             CHUNK_SIZE = 2**20  # 1Mb
             uncompressed = None
             uncompressed_name = None
             unzipped = False
             z = zipfile.ZipFile(dataset.path)
             for name in z.namelist():
                 if name.endswith('/'):
                     continue
                 if unzipped:
                     stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.'
                     break
                 fd, uncompressed = tempfile.mkstemp(
                     prefix='data_id_%s_upload_zip_' %
Beispiel #6
0
def iszip(file_path):
    return checkers.check_zip(file_path)
Beispiel #7
0
def main(archive,
         archivename,
         logfile,
         logid,
         workdir,
         merge,
         rm_header=0,
         concat=''):

    # create a temporary repository
    # tmp_dir = tempfile.mkdtemp(dir=os.getcwd())
    os.mkdir("decompress_files")

    # open log file
    mylog = open(logfile, "w")

    is_gzipped, is_gzvalid = check_gzip(archive)
    is_bzipped, is_bzvalid = check_bz2(archive)

    # extract all files in a temp directory
    # test if is a zip file
    if check_zip(archive):
        with zipfile.ZipFile(archive, 'r') as myarchive:
            myarchive.extractall("decompress_files")

    # test if is a tar file
    elif tarfile.is_tarfile(archive):
        mytarfile = tarfile.TarFile.open(archive)
        mytarfile.extractall("decompress_files")
        mytarfile.close()

    # test if is a gzip file
    elif is_gzipped and is_gzvalid:
        mygzfile = gzip.open(archive, 'rb')

        myungzippedfile = open(
            "decompress_files/" +
            os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
        for i in iter(lambda: mygzfile.read(2**20), ''):
            myungzippedfile.write(i)

        myungzippedfile.close()
        mygzfile.close()

    elif is_bzipped and is_bzvalid:
        mybzfile = bz2.BZ2File(archive, 'rb')

        myunbzippedfile = open(
            "decompress_files/" +
            os.path.splitext(os.path.basename(archivename))[0], 'wb', 2**20)
        for i in iter(lambda: mybzfile.read(2**20), ''):
            myunbzippedfile.write(i)

        myunbzippedfile.close()
        mybzfile.close()

    # test if merge is enable
    if merge == "true":
        mylog.write("Merge option is enabled with " + str(rm_header) +
                    " lines to deleted\n\n")
        myfinalfile = open(concat, "w")
        for myfile in listdirectory("decompress_files"):
            myopenfile = open(myfile, "r")
            nblinesremove = 0
            mylog.write(
                os.path.basename(myfile) +
                " is extracted from the archive and is added into the result file\n"
            )
            for line in myopenfile:

                # if not equal, don't write
                if int(rm_header) != nblinesremove:
                    nblinesremove = nblinesremove + 1
                else:
                    # write the line into the final file
                    myfinalfile.write(line)

        myfinalfile.close()

        shutil.rmtree("decompress_files")

    else:
        # if merge is disable
        mylog.write("Merge option is disabled\n\n")

        # move all files (recursively) in the working dir
        for myfile in listdirectory("decompress_files"):
            myfileclean = myfile.replace(" ", "\ ")

            mylog.write(
                os.path.basename(myfileclean) +
                " is extracted from the archive \n")

            fileext = os.path.splitext(myfile)[1].replace(".", "")

            # if no extension
            if fileext == '':
                shutil.move(os.path.abspath(myfile),
                            os.path.abspath(myfile) + ".txt")

            if fileext == 'fa':
                shutil.move(os.path.abspath(myfile),
                            os.path.abspath(myfile).replace(".fa", ".fasta"))

            if fileext == 'fq':
                shutil.move(os.path.abspath(myfile),
                            os.path.abspath(myfile).replace(".fq", ".fastq"))

        mylog.write(
            "\nPlease refresh your history if all files are not present\n")
        mylog.close()