Example #1
0
    def do_slow_split( cls, input_datasets, subdir_generator_function, split_params):
        # count the sequences so we can split
        # TODO: if metadata is present, take the number of lines / 4
        if input_datasets[0].metadata is not None and input_datasets[0].metadata.sequences is not None:
            total_sequences = input_datasets[0].metadata.sequences
        else:
            input_file = input_datasets[0].file_name
            compress = is_gzip(input_file)
            if compress:
                # gzip is really slow before python 2.7!
                in_file = gzip.GzipFile(input_file, 'r')
            else:
                # TODO
                # if a file is not compressed, seek locations can be calculated and stored
                # ideally, this would be done in metadata
                # TODO
                # Add BufferedReader if python 2.7?
                in_file = open(input_file, 'rt')
            total_sequences = long(0)
            for i, line in enumerate(in_file):
                total_sequences += 1
            in_file.close()
            total_sequences /= 4

        sequences_per_file = cls.get_sequences_per_file(total_sequences, split_params)
        return cls.write_split_files(input_datasets, None, subdir_generator_function, sequences_per_file)
Example #2
0
def get_headers( fname, sep, count=60, is_multi_byte=False ):
    """
    Returns a list with the first 'count' lines split by 'sep'

    >>> fname = get_test_fname('complete.bed')
    >>> get_headers(fname,'\\t')
    [['chr7', '127475281', '127491632', 'NM_000230', '0', '+', '127486022', '127488767', '0', '3', '29,172,3225,', '0,10713,13126,'], ['chr7', '127486011', '127488900', 'D49487', '0', '+', '127486022', '127488767', '0', '2', '155,490,', '0,2399']]
    """
    headers = []
    compressed_gzip = is_gzip(fname)
    compressed_bzip2 = is_bz2(fname)
    try:
        if compressed_gzip:
            in_file = gzip.GzipFile(fname, 'r')
        elif compressed_bzip2:
            in_file = bz2.BZ2File(fname, 'r')
        else:
            in_file = open(fname, 'rt')
        for idx, line in enumerate(in_file):
            line = line.rstrip('\n\r')
            if is_multi_byte:
                # TODO: fix this - sep is never found in line
                line = unicodify( line, 'utf-8' )
                sep = sep.encode( 'utf-8' )
            headers.append( line.split(sep) )
            if idx == count:
                break
    finally:
        in_file.close()
    return headers
Example #3
0
    def sniff( self, filename ):
        """
        Determines whether the file is in ELAND export format

        A file in ELAND export format consists of lines of tab-separated data.
        There is no header.

        Rules for sniffing as True::

            - There must be 22 columns on each line
            - LANE, TILEm X, Y, INDEX, READ_NO, SEQ, QUAL, POSITION, *STRAND, FILT must be correct
            - We will only check that up to the first 5 alignments are correctly formatted.
        """
        try:
            compress = is_gzip(filename)
            if compress:
                fh = gzip.GzipFile(filename, 'r')
            else:
                fh = open( filename )
            count = 0
            while True:
                line = fh.readline()
                line = line.strip()
                if not line:
                    break  # EOF
                if line:
                    linePieces = line.split('\t')
                    if len(linePieces) != 22:
                        return False
                    try:
                        if long(linePieces[1]) < 0:
                            raise Exception('Out of range')
                        if long(linePieces[2]) < 0:
                            raise Exception('Out of range')
                        if long(linePieces[3]) < 0:
                            raise Exception('Out of range')
                        int(linePieces[4])
                        int(linePieces[5])
                        # can get a lot more specific
                    except ValueError:
                        fh.close()
                        return False
                    count += 1
                    if count == 5:
                        break
            if count > 0:
                fh.close()
                return True
        except:
            pass
        fh.close()
        return False
def get_repository_file_contents(app, file_path, repository_id, is_admin=False):
    """Return the display-safe contents of a repository file for display in a browser."""
    safe_str = ''
    if not is_path_browsable(app, file_path, repository_id, is_admin):
        log.warning('Request tries to access a file outside of the repository location. File path: %s', file_path)
        return 'Invalid file path'
    # Symlink targets are checked by is_path_browsable
    if os.path.islink(file_path):
        safe_str = 'link to: ' + basic_util.to_html_string(os.readlink(file_path))
        return safe_str
    elif checkers.is_gzip(file_path):
        return '<br/>gzip compressed file<br/>'
    elif checkers.is_bz2(file_path):
        return '<br/>bz2 compressed file<br/>'
    elif checkers.is_zip(file_path):
        return '<br/>zip compressed file<br/>'
    elif checkers.check_binary(file_path):
        return '<br/>Binary file<br/>'
    else:
        for i, line in enumerate(open(file_path)):
            safe_str = '%s%s' % (safe_str, basic_util.to_html_string(line))
            # Stop reading after string is larger than MAX_CONTENT_SIZE.
            if len(safe_str) > MAX_CONTENT_SIZE:
                large_str = \
                    '<br/>File contents truncated because file size is larger than maximum viewing size of %s<br/>' % \
                    util.nice_size(MAX_CONTENT_SIZE)
                safe_str = '%s%s' % (safe_str, large_str)
                break

        if len(safe_str) > basic_util.MAX_DISPLAY_SIZE:
            # Eliminate the middle of the file to display a file no larger than basic_util.MAX_DISPLAY_SIZE.
            # This may not be ideal if the file is larger than MAX_CONTENT_SIZE.
            join_by_str = \
                "<br/><br/>...some text eliminated here because file size is larger than maximum viewing size of %s...<br/><br/>" % \
                util.nice_size(basic_util.MAX_DISPLAY_SIZE)
            safe_str = util.shrink_string_by_size(safe_str,
                                                  basic_util.MAX_DISPLAY_SIZE,
                                                  join_by=join_by_str,
                                                  left_larger=True,
                                                  beginning_on_size_error=True)
        return safe_str
Example #5
0
    def process_split_file(data):
        """
        This is called in the context of an external process launched by a Task (possibly not on the Galaxy machine)
        to create the input files for the Task. The parameters:
        data - a dict containing the contents of the split file
        """
        args = data['args']
        input_name = data['input_name']
        output_name = data['output_name']
        start_sequence = long(args['start_sequence'])
        sequence_count = long(args['num_sequences'])

        if 'toc_file' in args:
            toc_file = json.load(open(args['toc_file'], 'r'))
            commands = Sequence.get_split_commands_with_toc(input_name, output_name, toc_file, start_sequence, sequence_count)
        else:
            commands = Sequence.get_split_commands_sequential(is_gzip(input_name), input_name, output_name, start_sequence, sequence_count)
        for cmd in commands:
            if 0 != os.system(cmd):
                raise Exception("Executing '%s' failed" % cmd)
        return True
Example #6
0
def looks_like_xml(path, regex=TOOL_REGEX):
    full_path = os.path.abspath(path)

    if not full_path.endswith(".xml"):
        return False

    if not os.path.getsize(full_path):
        return False

    if(checkers.check_binary(full_path) or
       checkers.check_image(full_path) or
       checkers.is_gzip(full_path) or
       checkers.is_bz2(full_path) or
       checkers.is_zip(full_path)):
        return False

    with open(path, "r") as f:
        start_contents = f.read(5 * 1024)
        if regex.search(start_contents):
            return True

    return False
Example #7
0
def main():
    """
    The format of the file is JSON::

        { "sections" : [
                { "start" : "x", "end" : "y", "sequences" : "z" },
                ...
        ]}

    This works only for UNCOMPRESSED fastq files. The Python GzipFile does not provide seekable
    offsets via tell(), so clients just have to split the slow way
    """
    input_fname = sys.argv[1]
    if is_gzip(input_fname):
        print('Conversion is only possible for uncompressed files')
        sys.exit(1)

    out_file = open(sys.argv[2], 'w')

    current_line = 0
    sequences = 1000000
    lines_per_chunk = 4 * sequences
    chunk_begin = 0

    in_file = open(input_fname)

    out_file.write('{"sections" : [')

    for line in in_file:
        current_line += 1
        if 0 == current_line % lines_per_chunk:
            chunk_end = in_file.tell()
            out_file.write('{"start":"%s","end":"%s","sequences":"%s"},' % (chunk_begin, chunk_end, sequences))
            chunk_begin = chunk_end

    chunk_end = in_file.tell()
    out_file.write('{"start":"%s","end":"%s","sequences":"%s"}' % (chunk_begin, chunk_end, (current_line % lines_per_chunk) / 4))
    out_file.write(']}\n')
def looks_like_a_tool_xml(path):
    """Quick check to see if a file looks like it may be a Galaxy XML tool file."""
    full_path = os.path.abspath(path)

    if not full_path.endswith(".xml"):
        return False

    if not os.path.getsize(full_path):
        return False

    if(checkers.check_binary(full_path) or
       checkers.check_image(full_path) or
       checkers.is_gzip(full_path) or
       checkers.is_bz2(full_path) or
       checkers.is_zip(full_path)):
        return False

    with open(path, "r") as f:
        start_contents = f.read(5 * 1024)
        if TOOL_REGEX.search(start_contents):
            return True

    return False
Example #9
0
def is_data_index_sample_file( file_path ):
    """
    Attempt to determine if a .sample file is appropriate for copying to ~/tool-data when
    a tool shed repository is being installed into a Galaxy instance.
    """
    # Currently most data index files are tabular, so check that first.  We'll assume that
    # if the file is tabular, it's ok to copy.
    if is_column_based( file_path ):
        return True
    # If the file is any of the following, don't copy it.
    if checkers.check_html( file_path ):
        return False
    if checkers.check_image( file_path ):
        return False
    if checkers.check_binary( name=file_path ):
        return False
    if checkers.is_bz2( file_path ):
        return False
    if checkers.is_gzip( file_path ):
        return False
    if checkers.check_zip( file_path ):
        return False
    # Default to copying the file if none of the above are true.
    return True
Example #10
0
 def set_meta( self, dataset, overwrite=True, skip=None, max_data_lines=5, **kwd ):
     if dataset.has_data():
         compress = is_gzip(dataset.file_name)
         if compress:
             dataset_fh = gzip.GzipFile(dataset.file_name, 'r')
         else:
             dataset_fh = open( dataset.file_name )
         lanes = {}
         tiles = {}
         barcodes = {}
         reads = {}
         # Should always read the entire file (until we devise a more clever way to pass metadata on)
         # if self.max_optional_metadata_filesize >= 0 and dataset.get_size() > self.max_optional_metadata_filesize:
         # If the dataset is larger than optional_metadata, just count comment lines.
         #     dataset.metadata.data_lines = None
         # else:
         # Otherwise, read the whole thing and set num data lines.
         for i, line in enumerate(dataset_fh):
             if line:
                 linePieces = line.split('\t')
                 if len(linePieces) != 22:
                     raise Exception('%s:%d:Corrupt line!' % (dataset.file_name, i))
                 lanes[linePieces[2]] = 1
                 tiles[linePieces[3]] = 1
                 barcodes[linePieces[6]] = 1
                 reads[linePieces[7]] = 1
             pass
         dataset.metadata.data_lines = i + 1
         dataset_fh.close()
         dataset.metadata.comment_lines = 0
         dataset.metadata.columns = 21
         dataset.metadata.column_types = ['str', 'int', 'int', 'int', 'int', 'int', 'str', 'int', 'str', 'str', 'str', 'str', 'str', 'str', 'str', 'str', 'str', 'str', 'str', 'str', 'str']
         dataset.metadata.lanes = lanes.keys()
         dataset.metadata.tiles = ["%04d" % int(t) for t in tiles.keys()]
         dataset.metadata.barcodes = filter(lambda x: x != '0', barcodes.keys()) + ['NoIndex' for x in barcodes.keys() if x == '0']
         dataset.metadata.reads = reads.keys()
Example #11
0
    def create_changeset_revision( self, trans, id, payload, **kwd ):
        """
        POST /api/repositories/{encoded_repository_id}/changeset_revision

        Create a new tool shed repository commit - leaving PUT on parent
        resource open for updating meta-attributes of the repository (and
        Galaxy doesn't allow PUT multipart data anyway
        https://trello.com/c/CQwmCeG6).

        :param id: the encoded id of the Repository object

        The following parameters may be included in the payload.
        :param commit_message: hg commit message for update.
        """

        # Example URL: http://localhost:9009/api/repositories/f9cad7b01a472135
        rdah = attribute_handlers.RepositoryDependencyAttributeHandler( trans.app, unpopulate=False )
        tdah = attribute_handlers.ToolDependencyAttributeHandler( trans.app, unpopulate=False )

        repository = suc.get_repository_in_tool_shed( trans.app, id )

        if not ( trans.user_is_admin() or
                 trans.app.security_agent.user_can_administer_repository( trans.user, repository ) or
                 trans.app.security_agent.can_push( trans.app, trans.user, repository ) ):
            trans.response.status = 400
            return {
                "err_msg": "You do not have permission to update this repository.",
            }

        repo_dir = repository.repo_path( trans.app )
        repo = hg_util.get_repo_for_repository( trans.app, repository=None, repo_path=repo_dir, create=False )

        upload_point = commit_util.get_upload_point( repository, **kwd )
        tip = repository.tip( trans.app )

        file_data = payload.get('file')
        # Code stolen from gx's upload_common.py
        if isinstance( file_data, FieldStorage ):
            assert not isinstance( file_data.file, StringIO.StringIO )
            assert file_data.file.name != '<fdopen>'
            local_filename = util.mkstemp_ln( file_data.file.name, 'upload_file_data_' )
            file_data.file.close()
            file_data = dict( filename=file_data.filename,
                              local_filename=local_filename )
        elif type( file_data ) == dict and 'local_filename' not in file_data:
            raise Exception( 'Uploaded file was encoded in a way not understood.' )

        commit_message = kwd.get( 'commit_message', 'Uploaded' )

        uploaded_file = open(file_data['local_filename'], 'rb')
        uploaded_file_name = file_data['local_filename']

        isgzip = False
        isbz2 = False
        isgzip = checkers.is_gzip( uploaded_file_name )
        if not isgzip:
            isbz2 = checkers.is_bz2( uploaded_file_name )
        if ( isgzip or isbz2 ):
            # Open for reading with transparent compression.
            tar = tarfile.open( uploaded_file_name, 'r:*' )
        else:
            tar = tarfile.open( uploaded_file_name )

        new_repo_alert = False
        remove_repo_files_not_in_tar = True

        ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \
            repository_content_util.upload_tar(
                trans,
                rdah,
                tdah,
                repository,
                tar,
                uploaded_file,
                upload_point,
                remove_repo_files_not_in_tar,
                commit_message,
                new_repo_alert
            )
        if ok:
            # Update the repository files for browsing.
            hg_util.update_repository( repo )
            # Get the new repository tip.
            if tip == repository.tip( trans.app ):
                trans.response.status = 400
                message = 'No changes to repository.'
                ok = False
            else:
                rmm = repository_metadata_manager.RepositoryMetadataManager( app=trans.app,
                                                                             user=trans.user,
                                                                             repository=repository )
                status, error_message = \
                    rmm.set_repository_metadata_due_to_new_tip( trans.request.host,
                                                                content_alert_str=content_alert_str,
                                                                **kwd )
                if error_message:
                    ok = False
                    trans.response.status = 500
                    message = error_message
        else:
            trans.response.status = 500
        if os.path.exists( uploaded_file_name ):
            os.remove( uploaded_file_name )
        if not ok:
            return {
                "err_msg": message,
                "content_alert": content_alert_str
            }
        else:
            return {
                "message": message,
                "content_alert": content_alert_str
            }
Example #12
0
 def upload(self, trans, **kwd):
     message = escape(kwd.get('message', ''))
     status = kwd.get('status', 'done')
     commit_message = escape(kwd.get('commit_message', 'Uploaded'))
     repository_id = kwd.get('repository_id', '')
     repository = repository_util.get_repository_in_tool_shed(trans.app, repository_id)
     repo_dir = repository.repo_path(trans.app)
     repo = hg_util.get_repo_for_repository(trans.app, repository=None, repo_path=repo_dir, create=False)
     uncompress_file = util.string_as_bool(kwd.get('uncompress_file', 'true'))
     remove_repo_files_not_in_tar = util.string_as_bool(kwd.get('remove_repo_files_not_in_tar', 'true'))
     uploaded_file = None
     upload_point = commit_util.get_upload_point(repository, **kwd)
     tip = repository.tip(trans.app)
     file_data = kwd.get('file_data', '')
     url = kwd.get('url', '')
     # Part of the upload process is sending email notification to those that have registered to
     # receive them.  One scenario occurs when the first change set is produced for the repository.
     # See the suc.handle_email_alerts() method for the definition of the scenarios.
     new_repo_alert = repository.is_new(trans.app)
     uploaded_directory = None
     if kwd.get('upload_button', False):
         if file_data == '' and url == '':
             message = 'No files were entered on the upload form.'
             status = 'error'
             uploaded_file = None
         elif url and url.startswith('hg'):
             # Use mercurial clone to fetch repository, contents will then be copied over.
             uploaded_directory = tempfile.mkdtemp()
             repo_url = 'http%s' % url[len('hg'):]
             repo_url = repo_url.encode('ascii', 'replace')
             try:
                 commands.clone(hg_util.get_configured_ui(), repo_url, uploaded_directory)
             except Exception as e:
                 message = 'Error uploading via mercurial clone: %s' % basic_util.to_html_string(str(e))
                 status = 'error'
                 basic_util.remove_dir(uploaded_directory)
                 uploaded_directory = None
         elif url:
             valid_url = True
             try:
                 stream = requests.get(url, stream=True)
             except Exception as e:
                 valid_url = False
                 message = 'Error uploading file via http: %s' % str(e)
                 status = 'error'
                 uploaded_file = None
             if valid_url:
                 fd, uploaded_file_name = tempfile.mkstemp()
                 uploaded_file = open(uploaded_file_name, 'wb')
                 for chunk in stream.iter_content(chunk_size=util.CHUNK_SIZE):
                     if chunk:
                         uploaded_file.write(chunk)
                 uploaded_file.flush()
                 uploaded_file_filename = url.split('/')[-1]
                 isempty = os.path.getsize(os.path.abspath(uploaded_file_name)) == 0
         elif file_data not in ('', None):
             uploaded_file = file_data.file
             uploaded_file_name = uploaded_file.name
             uploaded_file_filename = os.path.split(file_data.filename)[-1]
             isempty = os.path.getsize(os.path.abspath(uploaded_file_name)) == 0
         if uploaded_file or uploaded_directory:
             rdah = attribute_handlers.RepositoryDependencyAttributeHandler(trans.app, unpopulate=False)
             tdah = attribute_handlers.ToolDependencyAttributeHandler(trans.app, unpopulate=False)
             stdtm = ShedToolDataTableManager(trans.app)
             ok = True
             isgzip = False
             isbz2 = False
             if uploaded_file:
                 if uncompress_file:
                     isgzip = checkers.is_gzip(uploaded_file_name)
                     if not isgzip:
                         isbz2 = checkers.is_bz2(uploaded_file_name)
                 if isempty:
                     tar = None
                     istar = False
                 else:
                     # Determine what we have - a single file or an archive
                     try:
                         if (isgzip or isbz2) and uncompress_file:
                             # Open for reading with transparent compression.
                             tar = tarfile.open(uploaded_file_name, 'r:*')
                         else:
                             tar = tarfile.open(uploaded_file_name)
                         istar = True
                     except tarfile.ReadError as e:
                         tar = None
                         istar = False
             else:
                 # Uploaded directory
                 istar = False
             if istar:
                 ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \
                     repository_content_util.upload_tar(
                         trans,
                         rdah,
                         tdah,
                         repository,
                         tar,
                         uploaded_file,
                         upload_point,
                         remove_repo_files_not_in_tar,
                         commit_message,
                         new_repo_alert
                     )
             elif uploaded_directory:
                 ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \
                     self.upload_directory(trans,
                                           rdah,
                                           tdah,
                                           repository,
                                           uploaded_directory,
                                           upload_point,
                                           remove_repo_files_not_in_tar,
                                           commit_message,
                                           new_repo_alert)
             else:
                 if (isgzip or isbz2) and uncompress_file:
                     uploaded_file_filename = commit_util.uncompress(repository,
                                                                     uploaded_file_name,
                                                                     uploaded_file_filename,
                                                                     isgzip=isgzip,
                                                                     isbz2=isbz2)
                 if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and \
                         uploaded_file_filename != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME:
                     ok = False
                     message = 'Repositories of type <b>Repository suite definition</b> can only contain a single file named '
                     message += '<b>repository_dependencies.xml</b>.'
                 elif repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and \
                         uploaded_file_filename != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME:
                     ok = False
                     message = 'Repositories of type <b>Tool dependency definition</b> can only contain a single file named '
                     message += '<b>tool_dependencies.xml</b>.'
                 if ok:
                     if upload_point is not None:
                         full_path = os.path.abspath(os.path.join(repo_dir, upload_point, uploaded_file_filename))
                     else:
                         full_path = os.path.abspath(os.path.join(repo_dir, uploaded_file_filename))
                     # Move some version of the uploaded file to the load_point within the repository hierarchy.
                     if uploaded_file_filename in [rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME]:
                         # Inspect the contents of the file to see if toolshed or changeset_revision attributes
                         # are missing and if so, set them appropriately.
                         altered, root_elem, error_message = rdah.handle_tag_attributes(uploaded_file_name)
                         if error_message:
                             ok = False
                             message = error_message
                             status = 'error'
                         elif altered:
                             tmp_filename = xml_util.create_and_write_tmp_file(root_elem)
                             shutil.move(tmp_filename, full_path)
                         else:
                             shutil.move(uploaded_file_name, full_path)
                     elif uploaded_file_filename in [rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME]:
                         # Inspect the contents of the file to see if changeset_revision values are
                         # missing and if so, set them appropriately.
                         altered, root_elem, error_message = tdah.handle_tag_attributes(uploaded_file_name)
                         if error_message:
                             ok = False
                             message = error_message
                             status = 'error'
                         if ok:
                             if altered:
                                 tmp_filename = xml_util.create_and_write_tmp_file(root_elem)
                                 shutil.move(tmp_filename, full_path)
                             else:
                                 shutil.move(uploaded_file_name, full_path)
                     else:
                         shutil.move(uploaded_file_name, full_path)
                     if ok:
                         # See if any admin users have chosen to receive email alerts when a repository is updated.
                         # If so, check every uploaded file to ensure content is appropriate.
                         check_contents = commit_util.check_file_contents_for_email_alerts(trans.app)
                         if check_contents and os.path.isfile(full_path):
                             content_alert_str = commit_util.check_file_content_for_html_and_images(full_path)
                         else:
                             content_alert_str = ''
                         hg_util.add_changeset(repo.ui, repo, full_path)
                         # Convert from unicode to prevent "TypeError: array item must be char"
                         full_path = full_path.encode('ascii', 'replace')
                         hg_util.commit_changeset(repo.ui,
                                                  repo,
                                                  full_path_to_changeset=full_path,
                                                  username=trans.user.username,
                                                  message=commit_message)
                         if full_path.endswith('tool_data_table_conf.xml.sample'):
                             # Handle the special case where a tool_data_table_conf.xml.sample file is being uploaded
                             # by parsing the file and adding new entries to the in-memory trans.app.tool_data_tables
                             # dictionary.
                             error, error_message = stdtm.handle_sample_tool_data_table_conf_file(full_path, persist=False)
                             if error:
                                 message = '%s<br/>%s' % (message, error_message)
                         # See if the content of the change set was valid.
                         admin_only = len(repository.downloadable_revisions) != 1
                         suc.handle_email_alerts(trans.app,
                                                 trans.request.host,
                                                 repository,
                                                 content_alert_str=content_alert_str,
                                                 new_repo_alert=new_repo_alert,
                                                 admin_only=admin_only)
             if ok:
                 # Update the repository files for browsing.
                 hg_util.update_repository(repo)
                 # Get the new repository tip.
                 if tip == repository.tip(trans.app):
                     message = 'No changes to repository.  '
                     status = 'warning'
                 else:
                     if (isgzip or isbz2) and uncompress_file:
                         uncompress_str = ' uncompressed and '
                     else:
                         uncompress_str = ' '
                     if uploaded_directory:
                         source_type = "repository"
                         source = url
                     else:
                         source_type = "file"
                         source = uploaded_file_filename
                     message = "The %s <b>%s</b> has been successfully%suploaded to the repository.  " % \
                         (source_type, escape(source), uncompress_str)
                     if istar and (undesirable_dirs_removed or undesirable_files_removed):
                         items_removed = undesirable_dirs_removed + undesirable_files_removed
                         message += "  %d undesirable items (.hg .svn .git directories, .DS_Store, hgrc files, etc) " % items_removed
                         message += "were removed from the archive.  "
                     if istar and remove_repo_files_not_in_tar and files_to_remove:
                         if upload_point is not None:
                             message += "  %d files were removed from the repository relative to the selected upload point '%s'.  " % \
                                 (len(files_to_remove), upload_point)
                         else:
                             message += "  %d files were removed from the repository root.  " % len(files_to_remove)
                     rmm = repository_metadata_manager.RepositoryMetadataManager(app=trans.app,
                                                                                 user=trans.user,
                                                                                 repository=repository)
                     status, error_message = \
                         rmm.set_repository_metadata_due_to_new_tip(trans.request.host,
                                                                    content_alert_str=content_alert_str,
                                                                    **kwd)
                     if error_message:
                         message = error_message
                     kwd['message'] = message
                 if repository.metadata_revisions:
                     # A repository's metadata revisions are order descending by update_time, so the zeroth revision
                     # will be the tip just after an upload.
                     metadata_dict = repository.metadata_revisions[0].metadata
                 else:
                     metadata_dict = {}
                 dd = dependency_display.DependencyDisplayer(trans.app)
                 if str(repository.type) not in [rt_util.REPOSITORY_SUITE_DEFINITION,
                                                 rt_util.TOOL_DEPENDENCY_DEFINITION]:
                     change_repository_type_message = rt_util.generate_message_for_repository_type_change(trans.app,
                                                                                                          repository)
                     if change_repository_type_message:
                         message += change_repository_type_message
                         status = 'warning'
                     else:
                         # Provide a warning message if a tool_dependencies.xml file is provided, but tool dependencies
                         # weren't loaded due to a requirement tag mismatch or some other problem.  Tool dependency
                         # definitions can define orphan tool dependencies (no relationship to any tools contained in the
                         # repository), so warning messages are important because orphans are always valid.  The repository
                         # owner must be warned in case they did not intend to define an orphan dependency, but simply
                         # provided incorrect information (tool shed, name owner, changeset_revision) for the definition.
                         orphan_message = dd.generate_message_for_orphan_tool_dependencies(repository, metadata_dict)
                         if orphan_message:
                             message += orphan_message
                             status = 'warning'
                 # Handle messaging for invalid tool dependencies.
                 invalid_tool_dependencies_message = dd.generate_message_for_invalid_tool_dependencies(metadata_dict)
                 if invalid_tool_dependencies_message:
                     message += invalid_tool_dependencies_message
                     status = 'error'
                 # Handle messaging for invalid repository dependencies.
                 invalid_repository_dependencies_message = \
                     dd.generate_message_for_invalid_repository_dependencies(metadata_dict,
                                                                             error_from_tuple=True)
                 if invalid_repository_dependencies_message:
                     message += invalid_repository_dependencies_message
                     status = 'error'
                 # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file.
                 stdtm.reset_tool_data_tables()
                 if uploaded_directory:
                     basic_util.remove_dir(uploaded_directory)
                 trans.response.send_redirect(web.url_for(controller='repository',
                                                          action='browse_repository',
                                                          id=repository_id,
                                                          commit_message='Deleted selected files',
                                                          message=message,
                                                          status=status))
             else:
                 if uploaded_directory:
                     basic_util.remove_dir(uploaded_directory)
                 status = 'error'
             # Reset the tool_data_tables by loading the empty tool_data_table_conf.xml file.
             stdtm.reset_tool_data_tables()
     return trans.fill_template('/webapps/tool_shed/repository/upload.mako',
                                repository=repository,
                                changeset_revision=tip,
                                url=url,
                                commit_message=commit_message,
                                uncompress_file=uncompress_file,
                                remove_repo_files_not_in_tar=remove_repo_files_not_in_tar,
                                message=message,
                                status=status)
Example #13
0
         isempty = os.path.getsize( os.path.abspath( uploaded_file_name ) ) == 0
 elif file_data not in ( '', None ):
     uploaded_file = file_data.file
     uploaded_file_name = uploaded_file.name
     uploaded_file_filename = os.path.split( file_data.filename )[ -1 ]
     isempty = os.path.getsize( os.path.abspath( uploaded_file_name ) ) == 0
 if uploaded_file or uploaded_directory:
     rdah = attribute_handlers.RepositoryDependencyAttributeHandler( trans.app, unpopulate=False )
     tdah = attribute_handlers.ToolDependencyAttributeHandler( trans.app, unpopulate=False )
     tdtm = data_table_manager.ToolDataTableManager( trans.app )
     ok = True
     isgzip = False
     isbz2 = False
     if uploaded_file:
         if uncompress_file:
             isgzip = checkers.is_gzip( uploaded_file_name )
             if not isgzip:
                 isbz2 = checkers.is_bz2( uploaded_file_name )
         if isempty:
             tar = None
             istar = False
         else:
             # Determine what we have - a single file or an archive
             try:
                 if ( isgzip or isbz2 ) and uncompress_file:
                     # Open for reading with transparent compression.
                     tar = tarfile.open( uploaded_file_name, 'r:*' )
                 else:
                     tar = tarfile.open( uploaded_file_name )
                 istar = True
             except tarfile.ReadError, e:
Example #14
0
    def create_changeset_revision(self, trans, id, payload, **kwd):
        """
        POST /api/repositories/{encoded_repository_id}/changeset_revision

        Create a new tool shed repository commit - leaving PUT on parent
        resource open for updating meta-attributes of the repository (and
        Galaxy doesn't allow PUT multipart data anyway
        https://trello.com/c/CQwmCeG6).

        :param id: the encoded id of the Repository object

        The following parameters may be included in the payload.
        :param commit_message: hg commit message for update.
        """

        # Example URL: http://localhost:9009/api/repositories/f9cad7b01a472135
        rdah = attribute_handlers.RepositoryDependencyAttributeHandler(
            trans.app, unpopulate=False)
        tdah = attribute_handlers.ToolDependencyAttributeHandler(
            trans.app, unpopulate=False)

        repository = suc.get_repository_in_tool_shed(trans.app, id)

        if not (trans.user_is_admin()
                or trans.app.security_agent.user_can_administer_repository(
                    trans.user, repository)
                or trans.app.security_agent.can_push(trans.app, trans.user,
                                                     repository)):
            trans.response.status = 400
            return {
                "err_msg":
                "You do not have permission to update this repository.",
            }

        repo_dir = repository.repo_path(trans.app)
        repo = hg_util.get_repo_for_repository(trans.app,
                                               repository=None,
                                               repo_path=repo_dir,
                                               create=False)

        upload_point = commit_util.get_upload_point(repository, **kwd)
        tip = repository.tip(trans.app)

        file_data = payload.get('file')
        # Code stolen from gx's upload_common.py
        if isinstance(file_data, FieldStorage):
            assert not isinstance(file_data.file, StringIO.StringIO)
            assert file_data.file.name != '<fdopen>'
            local_filename = util.mkstemp_ln(file_data.file.name,
                                             'upload_file_data_')
            file_data.file.close()
            file_data = dict(filename=file_data.filename,
                             local_filename=local_filename)
        elif type(file_data) == dict and 'local_filename' not in file_data:
            raise Exception(
                'Uploaded file was encoded in a way not understood.')

        commit_message = kwd.get('commit_message', 'Uploaded')

        uploaded_file = open(file_data['local_filename'], 'rb')
        uploaded_file_name = file_data['local_filename']

        isgzip = False
        isbz2 = False
        isgzip = checkers.is_gzip(uploaded_file_name)
        if not isgzip:
            isbz2 = checkers.is_bz2(uploaded_file_name)
        if (isgzip or isbz2):
            # Open for reading with transparent compression.
            tar = tarfile.open(uploaded_file_name, 'r:*')
        else:
            tar = tarfile.open(uploaded_file_name)

        new_repo_alert = False
        remove_repo_files_not_in_tar = True

        ok, message, files_to_remove, content_alert_str, undesirable_dirs_removed, undesirable_files_removed = \
            repository_content_util.upload_tar(
                trans,
                rdah,
                tdah,
                repository,
                tar,
                uploaded_file,
                upload_point,
                remove_repo_files_not_in_tar,
                commit_message,
                new_repo_alert
            )
        if ok:
            # Update the repository files for browsing.
            hg_util.update_repository(repo)
            # Get the new repository tip.
            if tip == repository.tip(trans.app):
                trans.response.status = 400
                message = 'No changes to repository.'
                ok = False
            else:
                rmm = repository_metadata_manager.RepositoryMetadataManager(
                    app=trans.app, user=trans.user, repository=repository)
                status, error_message = \
                    rmm.set_repository_metadata_due_to_new_tip( trans.request.host,
                                                                content_alert_str=content_alert_str,
                                                                **kwd )
                if error_message:
                    ok = False
                    trans.response.status = 500
                    message = error_message
        else:
            trans.response.status = 500
        if os.path.exists(uploaded_file_name):
            os.remove(uploaded_file_name)
        if not ok:
            return {"err_msg": message, "content_alert": content_alert_str}
        else:
            return {"message": message, "content_alert": content_alert_str}