예제 #1
0
def search_for_genomes():
    """
    Searches for new genomes.  This will sync the file system and the genomes know by the database
    """
    def set_common(dest, genome_dict, ref_dir, lib):
        try:
            dest.name = genome_dict["genome_name"]
            dest.version = genome_dict["genome_version"]
            dest.index_version = genome_dict["index_version"]
            dest.reference_path = os.path.join(ref_dir, dest.index_version, dest.short_name)
        except:
            dest.name = lib
            dest.status = "missing info.txt"
        return dest

    ref_dir = '/results/referenceLibrary'

    lib_versions = []

    for folder in os.listdir(ref_dir):
        if os.path.isdir(os.path.join(ref_dir, folder)) and folder.lower().startswith("tmap"):
            lib_versions.append(folder)
    logger.debug("Reference genome scanner found %s" % ",".join(lib_versions))
    for lib_version in lib_versions:
        if os.path.exists(os.path.join(ref_dir, lib_version)):
            libs = os.listdir(os.path.join(ref_dir, lib_version))
            for lib in libs:
                genome_info_text = os.path.join(ref_dir, lib_version, lib, lib + ".info.txt")
                genome_dict = _read_genome_info(genome_info_text)
                #TODO: we have to take into account the genomes that are queue for creation of in creation

                if genome_dict:
                    #here we trust that the path the genome is in, is also the short name
                    existing_reference = ReferenceGenome.objects.filter(
                        short_name=lib).order_by("-index_version")[:1]
                    if existing_reference:
                        rg = existing_reference[0]
                        if rg.index_version != genome_dict["index_version"]:
                            logger.debug("Updating genome status to 'found' for %s id=%d index=%s" % (
                            str(rg), rg.id, rg.index_version))
                            rg.status = "complete"
                            rg = set_common(rg, genome_dict, ref_dir, lib)
                            rg.save()
                    else:
                        logger.info("Found new genome %s index=%s" % (
                            lib, genome_dict["genome_version"]))
                        #the reference was not found, add it to the db
                        rg = ReferenceGenome()
                        rg.short_name = lib
                        rg.date = datetime.datetime.now()
                        rg.status = "complete"
                        rg.enabled = True

                        rg.index_version = ""
                        rg.version = ""
                        rg.name = ""

                        rg = set_common(rg, genome_dict, ref_dir, lib)

                        rg.save()
                        logger.info("Created new reference genome %s id=%d" % (
                            str(rg), rg.id))
예제 #2
0
def new_genome(request):
    """This is the page to create a new genome. The XML-RPC server is ionJobServer.
    """

    if request.method == "POST":
        # parse the data sent in

        #required
        name = request.POST.get('name', False)
        short_name = request.POST.get('short_name', False)
        fasta = request.POST.get('target_file', False)
        version = request.POST.get('version', False)
        notes = request.POST.get('notes', "")

        #optional
        read_sample_size = request.POST.get('read_sample_size', False)
        read_exclude_length = request.POST.get('read_exclude_length', False)

        #URL download
        url = request.POST.get('url', False)

        error_status = ""
        reference_path = REFERENCE_LIBRARY_TEMP_DIR + fasta

        why_delete = ""

        #if any of those were false send back a failed message
        if not all((name, short_name, fasta, version)):
            return render_to_json({"status": "Form validation failed", "error": True})

        if not set(short_name).issubset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"):
            return render_to_json({"status": "The short name has invalid characters. The valid values are letters, numbers, and underscores.", "error": True})

        #TODO: check to make sure the zip file only has one fasta or fa
        if not url:
            #check to ensure the size on the OS the same as the reported.
            reported_file_size = request.POST.get('reported_file_size', False)

            try:
                uploaded_file_size = str(os.path.getsize(reference_path))
            except OSError:
                return render_to_json({"status": "The FASTA temporary files was not found", "error": True})

            if reported_file_size != uploaded_file_size:
                why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading."

            if not (fasta.lower().endswith(".fasta") or fasta.lower().endswith(".zip")):
                why_delete = "The file you uploaded does not have a .fasta or .zip extension.  It must be a plain text fasta file or a Zip compressed fasta."

            if why_delete:
                try:
                    os.remove(reference_path)
                except OSError:
                    why_delete += " The FASTA file could not be deleted."
                return render_to_json({"status": why_delete, "error": True})

        #Make an genome ref object
        if ReferenceGenome.objects.filter(short_name=short_name, index_version=settings.TMAP_VERSION):
            #check to see if the genome already exists in the database with the same version
            return render_to_json({"status": "Failed - Genome with this short name and index version already exist.", "error": True})
        ref_genome = ReferenceGenome()
        ref_genome.name = name
        ref_genome.short_name = short_name
        ref_genome.version = version
        ref_genome.date = datetime.datetime.now()
        ref_genome.notes = notes
        ref_genome.status = "queued"
        ref_genome.enabled = False
        ref_genome.index_version = settings.TMAP_VERSION

        #before the object is saved we should ping the xml-rpc server to see if it is alive.
        try:
            conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT)
            #just check uptime to make sure the call does not fail
            conn.uptime()
            logger.debug('Connected to ionJobserver process.')
        except (socket.error, xmlrpclib.Fault):
            return render_to_json({"status": "Unable to connect to ionJobserver process.  You may need to restart ionJobserver", "error": True})

        #if the above didn't fail then we can save the object
        #this object must be saved before the tmap call is made
        ref_genome.save()
        logger.debug('Saved ReferenceGenome %s' % ref_genome.__dict__)

        #kick off the anaserve tmap xmlrpc call
        import traceback
        try:
            conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT)
            tmap_bool, tmap_status = conn.tmap(str(ref_genome.id), fasta, short_name, name, version,
                                               read_sample_size, read_exclude_length, settings.TMAP_VERSION)
            logger.debug('ionJobserver process reported %s %s' % (tmap_bool, tmap_status))
        except (socket.error, xmlrpclib.Fault):
            #delete the genome object, because it was not sucessful
            ref_genome.delete()
            return render_to_json({"status": "Error with index creation", "error": traceback.format_exc()})

        if not tmap_bool:
            ref_genome.delete()
            return render_to_json({"status": tmap_status, "error": True})

        return render_to_json({"status": "The genome index is being created.  This might take a while, check the status on the references tab. \
                                You are being redirected there now.", "error": False})

    elif request.method == "GET":
        ctx = RequestContext(request, {})
        return render_to_response("rundb/configure/modal_references_new_genome.html", context_instance=ctx)
예제 #3
0
파일: genomes.py 프로젝트: vswilliamson/TS
def search_for_genomes():
    """
    Searches for new genomes.  This will sync the file system and the genomes know by the database
    """
    def set_common(dest, genome_dict, ref_dir, lib):
        try:
            dest.name = genome_dict["genome_name"]
            dest.version = genome_dict["genome_version"]
            dest.index_version = genome_dict["index_version"]
            dest.reference_path = os.path.join(ref_dir, dest.index_version, dest.short_name)
        except:
            dest.name = lib
            dest.status = "missing info.txt"
        return dest

    ref_dir = '/results/referenceLibrary'

    lib_versions = []

    for folder in os.listdir(ref_dir):
        if os.path.isdir(os.path.join(ref_dir, folder)) and folder.lower().startswith("tmap"):
            lib_versions.append(folder)
    logger.debug("Reference genome scanner found %s" % ",".join(lib_versions))
    for lib_version in lib_versions:
        if os.path.exists(os.path.join(ref_dir, lib_version)):
            libs = os.listdir(os.path.join(ref_dir, lib_version))
            for lib in libs:
                genome_info_text = os.path.join(ref_dir, lib_version, lib, lib + ".info.txt")
                genome_dict = _read_genome_info(genome_info_text)
                #TODO: we have to take into account the genomes that are queue for creation of in creation

                if genome_dict:
                    #here we trust that the path the genome is in, is also the short name
                    existing_reference = ReferenceGenome.objects.filter(
                        short_name=lib).order_by("-index_version")[:1]
                    if existing_reference:
                        rg = existing_reference[0]
                        if rg.index_version != genome_dict["index_version"]:
                            logger.debug("Updating genome status to 'found' for %s id=%d index=%s" % (
                            str(rg), rg.id, rg.index_version))
                            rg.status = "complete"
                            rg = set_common(rg, genome_dict, ref_dir, lib)
                            rg.save()
                    else:
                        logger.info("Found new genome %s index=%s" % (
                            lib, genome_dict["genome_version"]))
                        #the reference was not found, add it to the db
                        rg = ReferenceGenome()
                        rg.short_name = lib
                        rg.date = datetime.datetime.now()
                        rg.status = "complete"
                        rg.enabled = True

                        rg.index_version = ""
                        rg.version = ""
                        rg.name = ""

                        rg = set_common(rg, genome_dict, ref_dir, lib)

                        rg.save()
                        logger.info("Created new reference genome %s id=%d" % (
                            str(rg), rg.id))
예제 #4
0
파일: genomes.py 프로젝트: aidjek/TS
def new_genome(request):
    """This is the page to create a new genome. The XML-RPC server is ionJobServer.
    """

    if request.method == "POST":
        # parse the data sent in

        #required
        name = request.POST.get('name', False)
        short_name = request.POST.get('short_name', False)
        fasta = request.POST.get('target_file', False)
        version = request.POST.get('version', False)
        notes = request.POST.get('notes', "")

        #optional
        read_exclude_length = request.POST.get('read_exclude_length', False)

        #URL download
        url = request.POST.get('url', False)

        error_status = ""
        reference_path = REFERENCE_LIBRARY_TEMP_DIR + fasta

        why_delete = ""

        #if any of those were false send back a failed message
        if not all((name, short_name, fasta, version)):
            return render_to_json({
                "status": "Form validation failed",
                "error": True
            })

        if not set(short_name).issubset(
                "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
        ):
            return render_to_json({
                "status":
                "The short name has invalid characters. The valid values are letters, numbers, and underscores.",
                "error": True
            })

        #TODO: check to make sure the zip file only has one fasta or fa
        if not url:
            #check to ensure the size on the OS the same as the reported.
            reported_file_size = request.POST.get('reported_file_size', False)

            try:
                uploaded_file_size = str(os.path.getsize(reference_path))
            except OSError:
                return render_to_json({
                    "status": "The FASTA temporary files was not found",
                    "error": True
                })

            if reported_file_size != uploaded_file_size:
                why_delete = "The file you uploaded differs from the expected size. This is due to an error uploading."

            if not (fasta.lower().endswith(".fasta")
                    or fasta.lower().endswith(".zip")):
                why_delete = "The file you uploaded does not have a .fasta or .zip extension.  It must be a plain text fasta file or a Zip compressed fasta."

            if why_delete:
                try:
                    os.remove(reference_path)
                except OSError:
                    why_delete += " The FASTA file could not be deleted."
                return render_to_json({"status": why_delete, "error": True})

        #Make an genome ref object
        if ReferenceGenome.objects.filter(short_name=short_name,
                                          index_version=settings.TMAP_VERSION):
            #check to see if the genome already exists in the database with the same version
            return render_to_json({
                "status":
                "Failed - Genome with this short name and index version already exist.",
                "error": True
            })
        ref_genome = ReferenceGenome()
        ref_genome.name = name
        ref_genome.short_name = short_name
        ref_genome.version = version
        ref_genome.date = datetime.datetime.now()
        ref_genome.notes = notes
        ref_genome.status = "queued"
        ref_genome.enabled = False
        ref_genome.index_version = settings.TMAP_VERSION

        #before the object is saved we should ping the xml-rpc server to see if it is alive.
        try:
            conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT)
            #just check uptime to make sure the call does not fail
            conn.uptime()
            logger.debug('Connected to ionJobserver process.')
        except (socket.error, xmlrpclib.Fault):
            return render_to_json({
                "status":
                "Unable to connect to ionJobserver process.  You may need to restart ionJobserver",
                "error": True
            })

        #if the above didn't fail then we can save the object
        #this object must be saved before the tmap call is made
        ref_genome.save()
        logger.debug('Saved ReferenceGenome %s' % ref_genome.__dict__)

        #kick off the anaserve tmap xmlrpc call
        import traceback
        try:
            conn = client.connect(JOBSERVER_HOST, settings.JOBSERVER_PORT)
            tmap_bool, tmap_status = conn.tmap(str(ref_genome.id), fasta,
                                               short_name, name, version,
                                               read_exclude_length,
                                               settings.TMAP_VERSION)
            logger.debug('ionJobserver process reported %s %s' %
                         (tmap_bool, tmap_status))
        except (socket.error, xmlrpclib.Fault):
            #delete the genome object, because it was not sucessful
            ref_genome.delete()
            return render_to_json({
                "status": "Error with index creation",
                "error": traceback.format_exc()
            })

        if not tmap_bool:
            ref_genome.delete()
            return render_to_json({"status": tmap_status, "error": True})

        return render_to_json({
            "status":
            "The genome index is being created.  This might take a while, check the status on the references tab. \
                                You are being redirected there now.",
            "error": False
        })

    elif request.method == "GET":
        ctx = RequestContext(request, {})
        return render_to_response(
            "rundb/configure/modal_references_new_genome.html",
            context_instance=ctx)