Example #1
0
def download_genome(request):
    if request.method == "POST":
        url = request.POST.get("reference_url", None)
        reference_meta = request.POST.get("reference_meta", None)
        logger.debug("downloading {0} with meta {1}".format(url, reference_meta))
        if url is not None:
            reference_args = json.loads(base64.b64decode(reference_meta))
            reference = ReferenceGenome(**reference_args)
            reference.save()
            start_reference_download(url, reference.id)
        return HttpResponseRedirect(urlresolvers.reverse("references_genome_download"))

    references = get_references() or []
    downloads = DownloadMonitor.objects.filter(tags__contains="reference").order_by('-created')
    for download in downloads:
        if download.size:
            download.percent_progress = "{0:.2%}".format(float(download.progress) / download.size)
        else:
            download.percent_progress = "..."
    ctx = {
        'downloads': downloads,
        'references': references
    }
    return render_to_response("rundb/configure/reference_download.html", ctx,
        context_instance=RequestContext(request))
Example #2
0
def new_reference_genome(reference_args, url=None, reference_file=None, callback_task=None, reference_mask_filename=None):
    # check if the genome already exists
    if ReferenceGenome.objects.filter(short_name=reference_args['short_name'], index_version=settings.TMAP_VERSION):
        raise Exception("Failed - Genome %s already exists" % reference_args['short_name'])

    reference = ReferenceGenome(**reference_args)
    reference.enabled = False
    reference.status = "queued"
    reference.save()

    if url:
        async_result = start_reference_download(url, reference, callback_task, reference_mask_filename=reference_mask_filename)
    elif reference_file:
        async_result = tasks.install_reference.apply_async(((reference_file, None), reference.id), link=callback_task)
    else:
        raise Exception('Failed creating new genome reference: No source file')

    reference.celery_task_id = async_result.task_id
    reference.save()
    return reference
Example #3
0
def new_reference_download(url, reference_args):
    reference = ReferenceGenome(**reference_args)
    reference.enabled = False
    reference.status = "downloading"
    reference.save()
    return start_reference_download(url, reference)
Example #4
0
def new_genome(request):
    """This is the page to create a new genome. 
    """

    if request.method == "POST":
        # parse the data sent in
        #required
        name = request.POST.get('name', False)
        short_name = request.POST.get('short_name', False)
        fasta = request.POST.get('target_file', False)
        version = request.POST.get('version', "")
        notes = request.POST.get('notes', "")

        #optional
        read_exclude_length = request.POST.get('read_exclude_length', False)

        #URL download
        url = request.POST.get('url', False)
        reference_path = os.path.join(settings.TEMP_PATH, fasta)
        why_delete = ""

        #if any of those were false send back a failed message
        if not all((name, short_name, fasta)):
            return render_to_json({"status": "Form validation failed", "error": True})

        if not set(short_name).issubset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"):
            return render_to_json({"status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True})

        #TODO: check to make sure the zip file only has one fasta or fa
        if not url:
            #check to ensure the size on the OS the same as the reported.
            reported_file_size = request.POST.get('reported_file_size', False)

            try:
                uploaded_file_size = str(os.path.getsize(reference_path))
            except OSError:
                return render_to_json({"status": "The FASTA temporary files was not found", "error": True})

            if reported_file_size != uploaded_file_size:
                why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading."

            if not (fasta.lower().endswith(".fasta") or fasta.lower().endswith(".zip")):
                why_delete = "The file you uploaded does not have a .fasta or .zip extension.  It must be a plain text fasta file or a Zip compressed fasta."
        is_zip = zipfile.is_zipfile(reference_path)
        if is_zip:
            zip_file = zipfile.ZipFile(reference_path, 'r')
            files = zip_file.namelist()
            # MAC OS zip is being compressed with __MACOSX folder Ex: '__MACOSX/', '__MACOSX/._contigs_2.fasta'.
            # Filter out those files and Upload only FASTA file
            files = [x for x in files if not 'MACOSX' in x]
            zip_file.close()
        else:
            files = [fasta]
        fasta_files = filter(lambda x: x.endswith('.fa') or x.endswith('.fasta'), files)

        if len(fasta_files) != 1:
            why_delete = "Error: upload must contain exactly one fasta file"
        else:
            target_fasta_file = fasta_files[0]

        if why_delete:
            try:
                os.remove(reference_path)
            except OSError:
                why_delete += " The FASTA file could not be deleted."
            logger.warning("User uploaded bad fasta file: " + str(why_delete))
            return render_to_json({"status": why_delete, "error": True})

        #Make an genome ref object
        if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION):
            #check to see if the genome already exists in the database with the same version
            return render_to_json({"status": "Failed - Genome with this short name and index version already exist.", "error": True})
        ref_genome = ReferenceGenome()
        ref_genome.name = name
        ref_genome.short_name = short_name
        ref_genome.version = version
        ref_genome.notes = notes
        ref_genome.status = "preprocessing"
        ref_genome.enabled = False
        ref_genome.index_version = settings.TMAP_VERSION
        ref_genome.save()
        logger.debug("Created new reference: %d/%s" % (ref_genome.pk, ref_genome))

        temp_dir = tempfile.mkdtemp(suffix=short_name, dir=settings.TEMP_PATH)
        temp_upload_path = os.path.join(temp_dir, fasta)
        os.chmod(temp_dir, 0777)
        os.rename(reference_path, temp_upload_path)
        monitor = FileMonitor(
            local_dir=temp_dir,
            name=fasta
        )
        monitor.save()
        ref_genome.file_monitor = monitor
        ref_genome.reference_path = temp_upload_path
        ref_genome.save()


        index_task = tasks.build_tmap_index.subtask((ref_genome.id,), immutable=True)
        if is_zip:
            result = tasks.unzip_reference.apply_async(
                args=(ref_genome.id, target_fasta_file),
                link=index_task
            )
        else:
            result = tasks.copy_reference.apply_async(
                args=(ref_genome.id,), 
                link=index_task
            )
        ref_genome.status = "queued"
        ref_genome.celery_task_id = result.task_id
        ref_genome.save()
        return render_to_json({"status": "The genome index is being created.  This might take a while, check the status on the references tab. \
                                You are being redirected there now.", "error": False})

    elif request.method == "GET":
        ctx = RequestContext(request, {})
        return render_to_response("rundb/configure/modal_references_new_genome.html", context_instance=ctx)
Example #5
0
def search_for_genomes():
    """
    Searches for new genomes.  This will sync the file system and the genomes know by the database
    """
    def set_common(dest, genome_dict, ref_dir, lib):
        try:
            dest.name = genome_dict["genome_name"]
            dest.version = genome_dict["genome_version"]
            dest.index_version = genome_dict["index_version"]
            dest.reference_path = os.path.join(ref_dir, dest.index_version, dest.short_name)
        except:
            dest.name = lib
            dest.status = "missing info.txt"
        return dest

    ref_dir = '/results/referenceLibrary'

    lib_versions = []

    for folder in os.listdir(ref_dir):
        if os.path.isdir(os.path.join(ref_dir, folder)) and folder.lower().startswith("tmap"):
            lib_versions.append(folder)
    logger.debug("Reference genome scanner found %s" % ",".join(lib_versions))
    for lib_version in lib_versions:
        if os.path.exists(os.path.join(ref_dir, lib_version)):
            libs = os.listdir(os.path.join(ref_dir, lib_version))
            for lib in libs:
                genome_info_text = os.path.join(ref_dir, lib_version, lib, lib + ".info.txt")
                genome_dict = _read_genome_info(genome_info_text)
                #TODO: we have to take into account the genomes that are queue for creation of in creation

                if genome_dict:
                    #here we trust that the path the genome is in, is also the short name
                    existing_reference = ReferenceGenome.objects.filter(
                        short_name=lib).order_by("-index_version")[:1]
                    if existing_reference:
                        rg = existing_reference[0]
                        if rg.index_version != genome_dict["index_version"]:
                            logger.debug("Updating genome status to 'found' for %s id=%d index=%s" % (
                            str(rg), rg.id, rg.index_version))
                            rg.status = "complete"
                            rg = set_common(rg, genome_dict, ref_dir, lib)
                            rg.save()
                    else:
                        logger.info("Found new genome %s index=%s" % (
                            lib, genome_dict["genome_version"]))
                        #the reference was not found, add it to the db
                        rg = ReferenceGenome()
                        rg.short_name = lib
                        rg.date = datetime.datetime.now()
                        rg.status = "complete"
                        rg.enabled = True

                        rg.index_version = ""
                        rg.version = ""
                        rg.name = ""

                        rg = set_common(rg, genome_dict, ref_dir, lib)

                        rg.save()
                        logger.info("Created new reference genome %s id=%d" % (
                            str(rg), rg.id))
Example #6
0
File: genomes.py Project: skner/TS
def new_reference_download(url, reference_args):
    reference = ReferenceGenome(**reference_args)
    reference.save()
    return start_reference_download(url, reference)
Example #7
0
def new_genome(request):
    """This is the page to create a new genome. The XML-RPC server is ionJobServer.
    """

    if request.method == "POST":
        # parse the data sent in

        #required
        name = request.POST.get('name', False)
        short_name = request.POST.get('short_name', False)
        fasta = request.POST.get('target_file', False)
        version = request.POST.get('version', False)
        notes = request.POST.get('notes', "")

        #optional
        read_sample_size = request.POST.get('read_sample_size', False)
        read_exclude_length = request.POST.get('read_exclude_length', False)

        #URL download
        url = request.POST.get('url', False)

        error_status = ""
        reference_path = REFERENCE_LIBRARY_TEMP_DIR + fasta

        why_delete = ""

        #if any of those were false send back a failed message
        if not all((name, short_name, fasta, version)):
            return render_to_json({"status": "Form validation failed", "error": True})

        if not set(short_name).issubset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"):
            return render_to_json({"status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True})

        #TODO: check to make sure the zip file only has one fasta or fa
        if not url:
            #check to ensure the size on the OS the same as the reported.
            reported_file_size = request.POST.get('reported_file_size', False)

            try:
                uploaded_file_size = str(os.path.getsize(reference_path))
            except OSError:
                return render_to_json({"status": "The FASTA temporary files was not found", "error": True})

            if reported_file_size != uploaded_file_size:
                why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading."

            if not (fasta.lower().endswith(".fasta") or fasta.lower().endswith(".zip")):
                why_delete = "The file you uploaded does not have a .fasta or .zip extension.  It must be a plain text fasta file or a Zip compressed fasta."

            if why_delete:
                try:
                    os.remove(reference_path)
                except OSError:
                    why_delete += " The FASTA file could not be deleted."
                return render_to_json({"status": why_delete, "error": True})

        #Make an genome ref object
        if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION):
            #check to see if the genome already exists in the database with the same version
            return render_to_json({"status": "Failed - Genome with this short name and index version already exist.", "error": True})
        ref_genome = ReferenceGenome()
        ref_genome.name = name
        ref_genome.short_name = short_name
        ref_genome.version = version
        ref_genome.date = datetime.datetime.now()
        ref_genome.notes = notes
        ref_genome.status = "queued"
        ref_genome.enabled = False
        ref_genome.index_version = settings.TMAP_VERSION

        #before the object is saved we should ping the xml-rpc server to see if it is alive.
        try:
            conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT)
            #just check uptime to make sure the call does not fail
            conn.uptime()
            logger.debug('Connected to ionJobserver process.')
        except (socket.error, xmlrpclib.Fault):
            return render_to_json({"status": "Unable to connect to ionJobserver process.  You may need to restart ionJobserver", "error": True})

        #if the above didn't fail then we can save the object
        #this object must be saved before the tmap call is made
        ref_genome.save()
        logger.debug('Saved ReferenceGenome %s' % ref_genome.__dict__)

        #kick off the anaserve tmap xmlrpc call
        import traceback
        try:
            conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT)
            tmap_bool, tmap_status = conn.tmap(str(ref_genome.id), fasta, short_name, name, version,
                                               read_sample_size, read_exclude_length, settings.TMAP_VERSION)
            logger.debug('ionJobserver process reported %s %s' % (tmap_bool, tmap_status))
        except (socket.error, xmlrpclib.Fault):
            #delete the genome object, because it was not sucessful
            ref_genome.delete()
            return render_to_json({"status": "Error with index creation", "error": traceback.format_exc()})

        if not tmap_bool:
            ref_genome.delete()
            return render_to_json({"status": tmap_status, "error": True})

        return render_to_json({"status": "The genome index is being created.  This might take a while, check the status on the references tab. \
                                You are being redirected there now.", "error": False})

    elif request.method == "GET":
        ctx = RequestContext(request, {})
        return render_to_response("rundb/configure/modal_references_new_genome.html", context_instance=ctx)
Example #8
0
def new_genome(request):
    """This is the page to create a new genome. 
    """

    if request.method == "POST":
        # parse the data sent in
        #required
        name = request.POST.get('name', False)
        short_name = request.POST.get('short_name', False)
        fasta = request.POST.get('target_file', False)
        version = request.POST.get('version', "")
        notes = request.POST.get('notes', "")

        #optional
        read_exclude_length = request.POST.get('read_exclude_length', False)

        #URL download
        url = request.POST.get('url', False)
        reference_path = os.path.join(settings.TEMP_PATH, fasta)
        why_delete = ""

        #if any of those were false send back a failed message
        if not all((name, short_name, fasta)):
            return render_to_json({"status": "Form validation failed", "error": True})

        if not set(short_name).issubset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"):
            return render_to_json({"status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True})

        #TODO: check to make sure the zip file only has one fasta or fa
        if not url:
            #check to ensure the size on the OS the same as the reported.
            reported_file_size = request.POST.get('reported_file_size', False)

            try:
                uploaded_file_size = str(os.path.getsize(reference_path))
            except OSError:
                return render_to_json({"status": "The FASTA temporary files was not found", "error": True})

            if reported_file_size != uploaded_file_size:
                why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading."

            if not (fasta.lower().endswith(".fasta") or fasta.lower().endswith(".zip")):
                why_delete = "The file you uploaded does not have a .fasta or .zip extension.  It must be a plain text fasta file or a Zip compressed fasta."
        is_zip = zipfile.is_zipfile(reference_path)
        if is_zip:
            zip_file = zipfile.ZipFile(reference_path, 'r')
            files = zip_file.namelist()
            zip_file.close()
        else:
            files = [fasta]
        fasta_files = filter(lambda x: x.endswith('.fa') or x.endswith('.fasta'), files)

        if len(fasta_files) != 1:
            why_delete = "Error: upload must contain exactly one fasta file"
        else:
            target_fasta_file = fasta_files[0]

        if why_delete:
            try:
                os.remove(reference_path)
            except OSError:
                why_delete += " The FASTA file could not be deleted."
            logger.warning("User uploaded bad fasta file: " + str(why_delete))
            return render_to_json({"status": why_delete, "error": True})

        #Make an genome ref object
        if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION):
            #check to see if the genome already exists in the database with the same version
            return render_to_json({"status": "Failed - Genome with this short name and index version already exist.", "error": True})
        ref_genome = ReferenceGenome()
        ref_genome.name = name
        ref_genome.short_name = short_name
        ref_genome.version = version
        ref_genome.notes = notes
        ref_genome.status = "preprocessing"
        ref_genome.enabled = False
        ref_genome.index_version = settings.TMAP_VERSION
        ref_genome.save()
        logger.debug("Created new reference: %d/%s" % (ref_genome.pk, ref_genome))

        temp_dir = tempfile.mkdtemp(suffix=short_name, dir=settings.TEMP_PATH)
        temp_upload_path = os.path.join(temp_dir, fasta)
        os.chmod(temp_dir, 0777)
        os.rename(reference_path, temp_upload_path)
        monitor = FileMonitor(
            local_dir=temp_dir,
            name=fasta
        )
        monitor.save()
        ref_genome.file_monitor = monitor
        ref_genome.reference_path = temp_upload_path
        ref_genome.save()


        index_task = tasks.build_tmap_index.subtask((ref_genome.id,), immutable=True)
        if is_zip:
            result = tasks.unzip_reference.apply_async(
                args=(ref_genome.id, target_fasta_file),
                link=index_task
            )
        else:
            result = tasks.copy_reference.apply_async(
                args=(ref_genome.id,), 
                link=index_task
            )
        ref_genome.status = "queued"
        ref_genome.celery_task_id = result.task_id
        ref_genome.save()
        return render_to_json({"status": "The genome index is being created.  This might take a while, check the status on the references tab. \
                                You are being redirected there now.", "error": False})

    elif request.method == "GET":
        ctx = RequestContext(request, {})
        return render_to_response("rundb/configure/modal_references_new_genome.html", context_instance=ctx)
Example #9
0
def search_for_genomes():
    """
    Searches for new genomes.  This will sync the file system and the genomes know by the database
    """
    def set_common(dest, genome_dict, ref_dir, lib):
        try:
            dest.name = genome_dict["genome_name"]
            dest.version = genome_dict["genome_version"]
            dest.index_version = genome_dict["index_version"]
            dest.reference_path = os.path.join(ref_dir, dest.index_version, dest.short_name)
        except:
            dest.name = lib
            dest.status = "missing info.txt"
        return dest

    ref_dir = '/results/referenceLibrary'

    lib_versions = []

    for folder in os.listdir(ref_dir):
        if os.path.isdir(os.path.join(ref_dir, folder)) and folder.lower().startswith("tmap"):
            lib_versions.append(folder)
    logger.debug("Reference genome scanner found %s" % ",".join(lib_versions))
    for lib_version in lib_versions:
        if os.path.exists(os.path.join(ref_dir, lib_version)):
            libs = os.listdir(os.path.join(ref_dir, lib_version))
            for lib in libs:
                genome_info_text = os.path.join(ref_dir, lib_version, lib, lib + ".info.txt")
                genome_dict = _read_genome_info(genome_info_text)
                #TODO: we have to take into account the genomes that are queue for creation of in creation

                if genome_dict:
                    #here we trust that the path the genome is in, is also the short name
                    existing_reference = ReferenceGenome.objects.filter(
                        short_name=lib).order_by("-index_version")[:1]
                    if existing_reference:
                        rg = existing_reference[0]
                        if rg.index_version != genome_dict["index_version"]:
                            logger.debug("Updating genome status to 'found' for %s id=%d index=%s" % (
                            str(rg), rg.id, rg.index_version))
                            rg.status = "complete"
                            rg = set_common(rg, genome_dict, ref_dir, lib)
                            rg.save()
                    else:
                        logger.info("Found new genome %s index=%s" % (
                            lib, genome_dict["genome_version"]))
                        #the reference was not found, add it to the db
                        rg = ReferenceGenome()
                        rg.short_name = lib
                        rg.date = datetime.datetime.now()
                        rg.status = "complete"
                        rg.enabled = True

                        rg.index_version = ""
                        rg.version = ""
                        rg.name = ""

                        rg = set_common(rg, genome_dict, ref_dir, lib)

                        rg.save()
                        logger.info("Created new reference genome %s id=%d" % (
                            str(rg), rg.id))