예제 #1
0
    def download_experiment_public_db(request, *args, **kwargs):
        init_database_connections()
        values = json.loads(request.POST['values'])

        comp_id = request.POST['compendium_id']
        channel_name = request.session['channel_name']
        view = request.POST['view']
        operation = request.POST['operation']
        compendium = CompendiumDatabase.objects.get(id=comp_id)
        for exp_id in values:
            scheduled_status = Status.objects.using(compendium.compendium_nick_name).get(
                name='experiment_scheduled')
            exp = ExperimentSearchResult.objects.using(compendium.compendium_nick_name).get(id=exp_id)
            exp.status = scheduled_status
            exp.save(using=compendium.compendium_nick_name)
            experiment_public.experiment_public_download.apply_async(
                (request.user.id, comp_id, exp_id, channel_name, view, operation)
            )
            Group("compendium_" + str(comp_id)).send({
                'text': json.dumps({
                    'stream': view,
                    'payload': {
                        'request': {'operation': 'refresh'},
                        'data': None
                    }
                })
            })

        return HttpResponse(json.dumps({'success': True}),
                            content_type="application/json")
예제 #2
0
 def create_compendium(request, *args, **kwargs):
     values = json.loads(request.POST['values'])
     db = CompendiumDatabase()
     db.compendium_name = values['compendium_name']
     db.compendium_nick_name = values['compendium_nick_name']
     db.description = values['description']
     db.html_description = values['html_description']
     db.compendium_type = CompendiumType.objects.get(id=values['compendium_type'])
     db.db_engine = values['db_engine']
     db.db_user = values.get('db_user', None)
     db.db_password = values.get('db_password', None)
     db.db_port = values.get('db_port', None)
     db.db_host = values.get('db_host', None)
     if values['create_db']:
         admin_db = copy.copy(db)
         admin_db.compendium_nick_name = values['admin']['username']
         admin_db.db_user = values['admin']['username']
         admin_db.db_password = values['admin']['password']
         create_db(values['admin']['username'], admin_db.get_setting_entry()[1], db.compendium_nick_name, db.db_user)
     db.save()
     init_database_connections()
     Group('admin').send({
         'text': json.dumps({
             'stream': request.POST['view'],
             'payload': {
                 'request': {'operation': 'refresh'},
                 'data': None
             }
         })
     })
     channel_name = request.session['channel_name']
     channel = Channel(channel_name)
     Group("compendium_" + str(db.id)).add(channel)
     return HttpResponse(json.dumps({'success': True}),
                         content_type="application/json")
예제 #3
0
def check_bio_features(request):
    comp = json.loads(request.POST['compendium'])
    init_database_connections()
    compendium = CompendiumDatabase.objects.get(id=comp['id'])
    bio_features = BioFeature.objects.using(compendium.compendium_nick_name).count() > 0
    return HttpResponse(
        json.dumps({
            'bio_features': bio_features
        }), content_type="application/json")
예제 #4
0
def run_parsing_bio_feature(self, user_id, compendium_id, file_path,
                            bio_feature_name, file_type, channel_name, view,
                            operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id

    parser_cls = None
    for cls in importers.importer_mapping[bio_feature_name]:
        if cls.FILE_TYPE_NAME == file_type:
            parser_cls = cls
            break

    parser = parser_cls(compendium.compendium_nick_name, bio_feature_name)
    parser.parse(file_path)
예제 #5
0
def run_alignment_filter(self, user_id, compendium_id, plt_dir, platform_id,
                         blast_file_name, alignment_length_1, gap_open_1,
                         mismatches_1, alignment_length_2, gap_open_2,
                         mismatches_2, channel_name, view, operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    mapper = MicroarrayMapper(os.path.join(plt_dir, blast_file_name))
    filter_params_id = mapper.write_params_db(float(alignment_length_1),
                                              float(gap_open_1),
                                              float(mismatches_1),
                                              float(alignment_length_2),
                                              float(gap_open_2),
                                              float(mismatches_2))
    mapper.set_filter_status(filter_params_id, 'running')
    task_id = self.request.id
    try:
        ViewTask.objects.using(compendium.compendium_nick_name). \
            get(view=view, operation=operation).delete()
    except Exception as e:
        pass
    channel_task = ViewTask(task_id=task_id, operation=operation, view=view)
    channel_task.save(using=compendium.compendium_nick_name)

    Group("compendium_" + str(compendium_id)).send({
        'text':
        json.dumps({
            'stream': view,
            'payload': {
                'request': {
                    'operation': 'refresh'
                },
                'data': None
            }
        })
    })

    try:
        mapper.filter(filter_params_id, self.is_aborted)
        if self.is_aborted():
            raise DatabaseError('Operation aborted by user')
    except Exception as e:
        mapper.set_filter_status(filter_params_id, 'error')
        raise e

    return filter_params_id
예제 #6
0
def uncompress_file(self, user_id, compendium_id, exp_id, filename,
                    channel_name, view, operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    operation = operation + "_" + str(exp_id)
    exp = Experiment.objects.using(
        compendium.compendium_nick_name).get(id=exp_id)

    base_dir = AdminOptions.objects.get(option_name='download_directory')
    base_dir = os.path.join(base_dir.option_value,
                            compendium.compendium_nick_name,
                            exp.experiment_access_id)

    exp_file = os.path.join(base_dir, filename)

    file_system.uncompress_file(exp_file, base_dir)
예제 #7
0
def experiment_public_search(self, user_id, compendium_id, term, db_id,
                             channel_name, view, operation):
    init_database_connections()
    channel = Channel(channel_name)
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    try:
        ViewTask.objects.using(compendium.compendium_nick_name). \
            get(view=view, operation=operation).delete()
    except Exception as e:
        pass
    channel_task = ViewTask(task_id=task_id, operation=operation, view=view)
    channel_task.save(using=compendium.compendium_nick_name)

    public_db_class_name = DataSource.objects.using(
        compendium.compendium_nick_name).get(id=db_id)
    Group("compendium_" + str(compendium_id)).send({
        'text':
        json.dumps({
            'stream': view,
            'payload': {
                'request': {
                    'operation': 'refresh'
                },
                'data': None
            }
        })
    })
    module_name, class_name = '.'.join(
        public_db_class_name.python_class.split('.')
        [:-1]), public_db_class_name.python_class.split('.')[-1]
    python_class = getattr(importlib.import_module(module_name), class_name)()
    results = python_class.search(term, user.email, db_id, self.is_aborted)
    experiment_status_download = Status.objects.using(
        compendium.compendium_nick_name).get(name='experiment_new')
    for result in results:
        result.status = experiment_status_download
    ExperimentSearchResult.objects.using(
        compendium.compendium_nick_name).filter(
            ~Q(status__name='experiment_scheduled')
            & ~Q(status__name='experiment_downloading')).delete()
    ExperimentSearchResult.objects.using(
        compendium.compendium_nick_name).bulk_create(results)
예제 #8
0
    def get_experiment_files(request, *args, **kwargs):
        base_dir = AdminOptions.objects.get(option_name='download_directory').option_value
        compendium = CompendiumDatabase.objects.get(id=request.POST['compendium_id'])
        init_database_connections()
        experiment = Experiment.objects.using(compendium.compendium_nick_name).get(id=request.POST['values'])

        full_dir = os.path.join(base_dir, compendium.compendium_nick_name, experiment.experiment_access_id)

        files = glob.iglob(os.path.join(full_dir, '**', '*'), recursive=True)

        files = [{'name': os.path.basename(filename),
                  'path': filename.replace(base_dir, ''),
                  'type': os.path.splitext(filename)[1],
                  'date': time.strftime('%Y-%m-%d %H:%M', time.gmtime(os.path.getmtime(filename))),
                  'size': int(os.path.getsize(filename) / 1000)}
                 for filename in files if os.path.isfile(filename)]

        return HttpResponse(json.dumps({'success': True, 'files': files, 'total': len(files)}),
                            content_type="application/json")
예제 #9
0
def experiment_local_upload(self, user_id, compendium_id, exp_id, exp_name,
                            exp_descr, exp_structure_file, exp_data_file,
                            channel_name, view, operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    operation = operation + "_" + str(exp_id)

    base_dir = AdminOptions.objects.get(option_name='download_directory')
    base_dir = os.path.join(base_dir.option_value,
                            compendium.compendium_nick_name, exp_id)

    exp_file = os.path.join(base_dir, exp_data_file)

    local_data_source = LocalDataSource()
    local_data_source.uncompress_experiment_file(exp_file, base_dir)
    os.rename(
        os.path.join(base_dir, exp_structure_file),
        os.path.join(base_dir,
                     local_data_source.experiment_structure_filename))
    local_data_source.create_experiment_structure(compendium_id, exp_id,
                                                  base_dir)
예제 #10
0
def run_parsing_script(self, user_id, compendium_id, exp_id, entity_type,
                       entity_name, channel_name, view, operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    operation = operation + "_" + str(exp_id)
    exp = Experiment.objects.using(
        compendium.compendium_nick_name).get(id=exp_id)
    sample = None
    platform = None
    entity_object_name = None
    running_status = Status.objects.using(
        compendium.compendium_nick_name).get(name='entity_script_running')
    assigned_files = []
    base_path = os.path.dirname(parsing_scripts.__file__)
    if entity_type == 'experiment':
        entity_object_name = exp.experiment_access_id
        for af in exp.assignedfile_set.all():
            af.status = running_status
            af.save(using=compendium.compendium_nick_name)
            af_dict = af.to_dict()
            af_dict['script_name'] = os.path.join(base_path, entity_type,
                                                  af_dict['script_name'])
            af_dict['order'] = 0 if not af_dict['order'] else int(
                af_dict['order'])
            assigned_files.append(af_dict)
    elif entity_type == 'platform':
        platform = Platform.objects.using(
            compendium.compendium_nick_name).get(id=entity_name)
        entity_object_name = platform.platform_access_id
        for af in platform.assignedfile_set.all():
            af.status = running_status
            af.save(using=compendium.compendium_nick_name)
            af_dict = af.to_dict()
            af_dict['script_name'] = os.path.join(base_path, entity_type,
                                                  af_dict['script_name'])
            af_dict['order'] = 0 if not af_dict['order'] else int(
                af_dict['order'])
            assigned_files.append(af_dict)
    elif entity_type == 'sample':
        sample = Sample.objects.using(compendium.compendium_nick_name).get(
            experiment=exp, id=entity_name)
        entity_object_name = sample.sample_name
        for af in sample.assignedfile_set.all():
            af.status = running_status
            af.save(using=compendium.compendium_nick_name)
            af_dict = af.to_dict()
            af_dict['script_name'] = os.path.join(base_path, entity_type,
                                                  af_dict['script_name'])
            af_dict['order'] = 0 if not af_dict['order'] else int(
                af_dict['order'])
            assigned_files.append(af_dict)
    assigned_files.sort(key=lambda x: int(x['order']))

    Group("compendium_" + str(compendium_id) + "_" + str(exp_id)).send({
        'text':
        json.dumps({
            'stream': entity_type + "_" + view,
            'payload': {
                'request': {
                    'operation': 'refresh'
                },
                'data': None
            }
        })
    })

    base_dir = AdminOptions.objects.get(option_name='raw_data_directory')
    out_dir = os.path.join(base_dir.option_value,
                           compendium.compendium_nick_name,
                           exp.experiment_access_id)
    key = os.path.join(out_dir, exp.experiment_access_id + '.sqlite')

    buffer = StringIO()
    sys.stdout = buffer
    context = {}
    soft_parser_dir = os.path.dirname(soft_file_parser.__file__)
    input_file_dir = AdminOptions.objects.get(
        option_name='download_directory').option_value
    input_file_dir = os.path.join(input_file_dir,
                                  compendium.compendium_nick_name,
                                  exp.experiment_access_id)
    experiment_proxy = ExperimentProxy(
        exp, key) if entity_type == 'experiment' else None
    platform_proxy = PlatformProxy(platform,
                                   key) if entity_type == 'platform' else None
    sample_proxy = SampleProxy(sample,
                               key) if entity_type == 'sample' else None
    for assigned_file in assigned_files:
        script = assigned_file['script_name']
        if os.path.isfile(script) and script.endswith('.py'):
            input_file_value = os.path.join(input_file_dir,
                                            assigned_file['input_filename'])
            context = {
                'PARAMETERS':
                [p.strip() for p in assigned_file['parameters'].split(',')],
                'INPUT_FILE':
                input_file_value,
                'ENTITY_NAME':
                entity_object_name,
                'EXPERIMENT_OBJECT':
                experiment_proxy,
                'PLATFORM_OBJECT':
                platform_proxy,
                'SAMPLE_OBJECT':
                sample_proxy,
                'COMPENDIUM':
                compendium.compendium_nick_name
            }
            script_dir = os.path.dirname(script)
            util_dir = os.path.join(os.path.dirname(script_dir), 'utils')
            sys.path.append(script_dir)
            sys.path.append(util_dir)
            sys.path.append(soft_parser_dir)
            exec(open(script).read(), context)

    sys.stdout = sys.__stdout__

    if 'EXPERIMENT_OBJECT' in context and isinstance(
            context['EXPERIMENT_OBJECT'], ExperimentProxy):
        context['EXPERIMENT_OBJECT'].save_experiment_object()
    if 'PLATFORM_OBJECT' in context and isinstance(context['PLATFORM_OBJECT'],
                                                   PlatformProxy):
        context['PLATFORM_OBJECT'].save_platform_object()
    if 'SAMPLE_OBJECT' in context and isinstance(context['SAMPLE_OBJECT'],
                                                 SampleProxy):
        context['SAMPLE_OBJECT'].save_sample_object()

    return buffer.getvalue()
예제 #11
0
def run_platform_mapper(self, user_id, compendium_id, plt_dir, platform_id,
                        use_short_blastn, alignment_identity, channel_name,
                        view, operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id

    blast_file_name = os.path.join(plt_dir, task_id + '.blast')
    open(blast_file_name, 'a').close()
    mapper = MicroarrayMapper(blast_file_name)
    mapper.create_db(alignment_identity, use_short_blastn)
    mapper.set_alignment_status('running')

    Group("compendium_" + str(compendium_id)).send({
        'text':
        json.dumps({
            'stream': view,
            'payload': {
                'request': {
                    'operation': 'refresh'
                },
                'data': None
            }
        })
    })

    report = ''

    # create gene fasta file (bio_feature)
    gene_file_name = os.path.join(plt_dir, task_id + '_gene.fasta')
    with open(gene_file_name, 'w') as f:
        gene_name = ''
        for gene in BioFeature.objects.using(
                compendium.compendium_nick_name).all():
            try:
                gene_name = gene.name
                sequence = gene.biofeaturevalues_set.filter(
                    bio_feature_field__name='sequence')[0].value
                f.write('>' + str(gene.id) + '\n' + str(sequence) + '\n')
            except Exception as e:
                report += 'You have no sequence for ' + gene_name + '<br>'

    # create probe fasta file
    probe_file_name = os.path.join(plt_dir, task_id + '_probe.fasta')
    with open(probe_file_name, 'w') as f:
        probe_name = ''
        for probe in BioFeatureReporter.objects.using(compendium.compendium_nick_name).\
                filter(platform_id=platform_id).all():
            try:
                probe_name = probe.name
                sequence = probe.biofeaturereportervalues_set.filter(
                    bio_feature_reporter_field__name='sequence')[0].value
                f.write('>' + str(probe.id) + '\n' + str(sequence) + '\n')
            except Exception as e:
                report += 'You have no sequence for ' + probe_name + '<br>'

    # create blast db
    cmd = os.path.join(settings.BASE_DIR, 'command', 'external_programs',
                       'ncbi-blast', 'bin')
    cmd += '/makeblastdb -dbtype nucl -in ' + gene_file_name
    process = subprocess.Popen(cmd,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               shell=True)
    (out, err) = process.communicate()

    # do alignment
    cmd = os.path.join(settings.BASE_DIR, 'command', 'external_programs',
                       'ncbi-blast', 'bin')
    os.environ["PATH"] += os.pathsep + cmd
    outfmt = '\'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen\''
    blastn_cline = NcbiblastnCommandline(query=probe_file_name,
                                         db=gene_file_name,
                                         perc_identity=alignment_identity /
                                         100.0,
                                         outfmt=outfmt,
                                         out=blast_file_name)
    if use_short_blastn:
        blastn_cline = NcbiblastnCommandline(query=probe_file_name,
                                             db=gene_file_name,
                                             perc_identity=alignment_identity /
                                             100.0,
                                             task='blastn-short',
                                             outfmt=outfmt,
                                             out=blast_file_name)
    (out, err) = blastn_cline()

    # create stats db
    mapper.create_db(alignment_identity, use_short_blastn)

    os.chmod(mapper.blast_filename, 0o666)
    os.chmod(mapper.sqlite_filename, 0o666)

    return report
예제 #12
0
def run_file_assignment_script(self, user_id, compendium_id, exp_id,
                               script_filename, parameters, input_files,
                               experiment_entity, platform_entity,
                               sample_entity, channel_name, view, operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    operation = operation + "_" + str(exp_id)
    exp = Experiment.objects.using(
        compendium.compendium_nick_name).get(id=exp_id)

    base_path = os.path.join(os.path.dirname(parsing_scripts.__file__),
                             'file_assignment')
    script = os.path.join(base_path, script_filename)

    buffer = StringIO()
    message = ""
    if os.path.isfile(script) and script.endswith('.py'):
        context = {
            'PARAMETERS': parameters.split(','),
            'INPUT_FILES': input_files
        }
        sys.stdout = buffer
        script_dir = os.path.dirname(script)
        util_dir = os.path.join(os.path.dirname(script_dir), 'utils')
        sys.path.append(script_dir)
        sys.path.append(util_dir)
        exec(open(script).read(), context)
        assign_function = context['assign']
        ready_status = Status.objects.using(
            compendium.compendium_nick_name).get(name='entity_script_ready')
        message = "Assigned "
        with transaction.atomic(using=compendium.compendium_nick_name):
            files = assign_function(context['INPUT_FILES'], exp.to_dict(),
                                    'experiment', context['PARAMETERS'])
            file_counter = 0
            for file in files:
                if not experiment_entity[0]:
                    continue
                file_counter += 1
                assigned_file = AssignedFile()
                assigned_file.script_name = experiment_entity[0]
                assigned_file.order = experiment_entity[1]
                assigned_file.parameters = experiment_entity[2]
                assigned_file.input_filename = os.path.basename(file)
                assigned_file.entity_type = 'EXP'
                assigned_file.experiment = exp
                assigned_file.status = ready_status
                assigned_file.save(using=compendium.compendium_nick_name)
            message += str(file_counter) + " files to EXPERIMENT, "
            platforms = set()
            file_counter = 0
            for sample in Sample.objects.using(
                    compendium.compendium_nick_name).filter(experiment=exp):
                platforms.add(sample.platform)
                files = assign_function(context['INPUT_FILES'],
                                        sample.to_dict(), 'sample',
                                        context['PARAMETERS'])
                for file in files:
                    if not sample_entity[0]:
                        continue
                    file_counter += 1
                    assigned_file = AssignedFile()
                    assigned_file.script_name = sample_entity[0]
                    assigned_file.order = sample_entity[1]
                    assigned_file.parameters = sample_entity[2]
                    assigned_file.input_filename = os.path.basename(file)
                    assigned_file.entity_type = 'SMP'
                    assigned_file.sample = sample
                    assigned_file.status = ready_status
                    assigned_file.save(using=compendium.compendium_nick_name)
            message += str(file_counter) + " files to SAMPLES, "
            file_counter = 0
            for platform in platforms:
                files = assign_function(context['INPUT_FILES'],
                                        platform.to_dict(), 'platform',
                                        context['PARAMETERS'])
                for file in files:
                    if not platform_entity[0]:
                        continue
                    file_counter += 1
                    assigned_file = AssignedFile()
                    assigned_file.script_name = platform_entity[0]
                    assigned_file.order = platform_entity[1]
                    assigned_file.parameters = platform_entity[2]
                    assigned_file.input_filename = os.path.basename(file)
                    assigned_file.entity_type = 'PLT'
                    assigned_file.platform = platform
                    assigned_file.status = ready_status
                    assigned_file.save(using=compendium.compendium_nick_name)
            message += str(file_counter) + " files to PLATFORMS"
    sys.stdout = sys.__stdout__

    return message, buffer.getvalue()
예제 #13
0
 def init_compendium(request, *args, **kwargs):
     init_compendium.init_compendium(request.POST['values'])
     init_database_connections()
     msg = Message(type='info', title='Compendium initialized', message='Compendium successfully initialized')
     return HttpResponse(json.dumps(msg.to_dict()),
                         content_type="application/json")
예제 #14
0
    def read_experiments_channel(channel_name, view, request, user):
        channel = Channel(channel_name)

        start = 0
        end = None
        compendium = CompendiumDatabase.objects.get(
            id=request['compendium_id'])
        #task_running = False
        operation = 'search_experiment_public_db'
        if request['page_size']:
            start = (request['page'] - 1) * request['page_size']
            end = start + request['page_size']
        #try:
        #    task_id = ViewTask.objects.using(compendium.compendium_nick_name).get(view=request['view'],
        #                                                                          operation=operation)
        #    task = AsyncResult(task_id.task_id)
        #    task_running = not task.ready()
        #except Exception as e:
        #    pass

        order = ''
        if request['ordering'] == 'DESC':
            order = '-'
        exp_ids = set()
        # experiment
        init_database_connections()
        query_response = Experiment.objects.using(compendium.compendium_nick_name). \
            filter(Q(organism__icontains=request['filter']) |
                   Q(experiment_access_id__icontains=request['filter']) |
                   Q(scientific_paper_ref__icontains=request['filter']) |
                   Q(description__icontains=request['filter']) |
                   Q(experiment_name__icontains=request['filter'])).values_list('id', flat=True)
        exp_ids.update(query_response)
        # sample number
        query_response = Sample.objects.using(compendium.compendium_nick_name). \
            all().values('experiment').annotate(total=Count('id'))
        exp_ids.update([
            resp['experiment'] for resp in query_response
            if request['filter'] in str(resp['total'])
        ])
        # platform access id
        platform_ids = Platform.objects.using(compendium.compendium_nick_name). \
            filter(Q(platform_access_id__icontains=request['filter'])).values_list('id', flat=True)
        query_response = Sample.objects.using(compendium.compendium_nick_name). \
            filter(platform_id__in=platform_ids).values_list('experiment_id', flat=True)
        exp_ids.update(query_response)

        ordererd = False
        query_response = Experiment.objects.using(compendium.compendium_nick_name). \
            filter(id__in=exp_ids)
        try:
            query_response.order_by(order + request['ordering_value'])[0]
            query_response = query_response.order_by(order +
                                                     request['ordering_value'])
            ordererd = True
        except Exception as e:
            pass
        total = query_response.count()
        query_response = query_response[start:end]

        experiments = []
        for exp in query_response:
            e = exp.to_dict()
            e['status_description'] = exp.status.description
            if exp.status.name == 'experiment_excluded':
                e['description'] = 'EXCLUDED: ' + exp.comments + '. ' + e[
                    'description']
            module_name, class_name = '.'.join(exp.data_source.python_class.split('.')[:-1]), \
                exp.data_source.python_class.split('.')[-1]
            python_class = getattr(importlib.import_module(module_name),
                                   class_name)()
            e['experiment_accession_base_link'] = python_class.experiment_accession_base_link
            e['platform_accession_base_link'] = python_class.platform_accession_base_link
            e['scientific_paper_accession_base_link'] = python_class.scientific_paper_accession_base_link
            e['platforms'] = [
                plt.to_dict() for plt in Platform.objects.
                using(compendium.compendium_nick_name).distinct().filter(
                    pk__in=set(
                        exp.sample_set.values_list('platform_id', flat=True)))
            ]
            e['n_samples'] = exp.sample_set.count()
            experiments.append(e)

        reverse = order == '-'

        if not ordererd:
            if request['ordering_value'] == 'platforms':
                experiments.sort(reverse=reverse,
                                 key=lambda x: ','.join([
                                     p['platform_access_id']
                                     for p in x[request['ordering_value']]
                                 ]))
            else:
                experiments.sort(reverse=reverse,
                                 key=lambda x: x[request['ordering_value']])

        channel.send({
            'text':
            json.dumps({
                'stream': view,
                'payload': {
                    'request': request,
                    'data': {
                        'experiments': experiments,
                        #'task_running': task_running,
                        'total': total
                    }
                }
            })
        })
예제 #15
0
def experiment_public_download(self, user_id, compendium_id, experiment_id,
                               channel_name, view, operation):
    init_database_connections()
    channel = Channel(channel_name)
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    operation = operation + "_" + str(experiment_id)
    downloading_status = Status.objects.using(
        compendium.compendium_nick_name).get(name='experiment_downloading')
    exp = ExperimentSearchResult.objects.using(
        compendium.compendium_nick_name).get(id=experiment_id)
    exp.status = downloading_status
    exp.save(using=compendium.compendium_nick_name)
    try:
        ViewTask.objects.using(compendium.compendium_nick_name). \
            get(operation=operation, view=view).delete()
    except Exception as e:
        pass
    channel_task = ViewTask(task_id=task_id, operation=operation, view=view)
    channel_task.save(using=compendium.compendium_nick_name)
    data_ready_status = Status.objects.using(
        compendium.compendium_nick_name).get(name='experiment_data_ready')
    base_output_directory = AdminOptions.objects.get(
        option_name='download_directory')
    exp = ExperimentSearchResult.objects.using(
        compendium.compendium_nick_name).get(id=experiment_id)
    out_dir = os.path.join(base_output_directory.option_value,
                           compendium.compendium_nick_name,
                           exp.experiment_access_id)
    os.makedirs(out_dir, exist_ok=True)
    Group("compendium_" + str(compendium_id)).send({
        'text':
        json.dumps({
            'stream': view,
            'payload': {
                'request': {
                    'operation': 'refresh'
                },
                'data': None
            }
        })
    })
    log_message = ''
    module_name, class_name = '.'.join(exp.data_source.python_class.split('.')[:-1]), \
                              exp.data_source.python_class.split('.')[-1]
    python_class = getattr(importlib.import_module(module_name), class_name)()
    python_class.download_experiment_files(exp.experiment_access_id,
                                           user.email, out_dir)
    try:
        Experiment.objects.using(compendium.compendium_nick_name).get(
            experiment_access_id=exp.experiment_access_id)
        message = Message(
            type='info',
            title='Experiment already exists',
            message='The experiment ' + exp.experiment_access_id +
            ' is already present in the database. Data have been download anyway.'
        )
        message.send_to(channel)
    except Exception as e:
        log_message = python_class.create_experiment_structure(
            compendium_id, experiment_id, out_dir)
    exp.status = data_ready_status
    exp.save(using=compendium.compendium_nick_name)

    return log_message
예제 #16
0
def export_raw_data(self, user_id, compendium_id, path, channel_name, view,
                    operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id

    os.makedirs(path, exist_ok=True)
    millis = int(round(time.time() * 1000))
    base_dir = AdminOptions.objects.get(
        option_name='raw_data_directory').option_value
    file_path_hdf5 = 'export_data_' + str(task_id) + '_' + str(
        millis) + '.hdf5'
    file_path_tsv = 'export_data_' + str(task_id) + '_' + str(millis) + '.tsv'
    file_path_gz = 'export_data_' + str(task_id) + '_' + str(
        millis) + '.tsv.gz'
    full_path_hdf5 = os.path.join(path, file_path_hdf5)
    full_path_tsv = os.path.join(path, file_path_tsv)
    full_path_gz = os.path.join(path, file_path_gz)
    try:
        for fl in glob.glob(path + '/*.tsv'):
            os.remove(fl)
        for fl in glob.glob(path + '/*.hdf5'):
            os.remove(fl)
        for fl in glob.glob(path + '/*.gz'):
            os.remove(fl)
    except Exception as e:
        pass
    store = pd.HDFStore(full_path_hdf5)
    header = Sample.objects.using(compendium.compendium_nick_name).\
        order_by('platform', 'experiment').values('id', 'sample_name')
    bio_features = BioFeature.objects.using(compendium.compendium_nick_name). \
        order_by('name')
    bio_feature_name = compendium.compendium_type.bio_feature_name
    reporter_name = 'reporter ({})'.format(','.join([
        plt.platform_type.description for plt in Platform.objects.using(
            compendium.compendium_nick_name).all() if plt.platform_type
    ]))
    columns = [bio_feature_name, reporter_name] + [
        'Platform', 'Platform type'
    ] + [s['sample_name'] for s in header]
    max_sample_name = max([len(s['sample_name']) for s in header])
    min_size = {s['sample_name']: max_sample_name for s in header}
    min_size['Platform'] = max([
        len(plt.platform_access_id)
        for plt in Platform.objects.using(compendium.compendium_nick_name)
    ])
    min_size['Platform type'] = max([
        len(plt.description)
        for plt in PlatformType.objects.using(compendium.compendium_nick_name)
    ])
    df = pd.DataFrame(columns=columns)
    store.put('raw_data',
              df,
              format='table',
              data_columns=True,
              min_itemsize=min_size)
    line_number = 50000
    batch_size = int((line_number * bio_features.count()) /
                     BioFeatureReporter.objects.using(
                         compendium.compendium_nick_name).count())
    for start, end, total, qs in batch_qs(bio_features, batch_size=batch_size):
        bfr = {(bf.id, bf.name): list(
            bf.biofeaturereporter_set.order_by('platform').values_list(
                'id', 'name', 'platform__platform_access_id',
                'platform__platform_type__description'))
               for bf in qs}
        bf_name_len = 15
        rep_name_len = 15
        data = [
            [],  # bio_features
            [],  # reporters
            [],  # platforms
            []  # platform types
        ]
        for k, v in bfr.items():
            for r in v:
                data[0].append(k[1])  # bio_features
                data[1].append(r[0])  # reporters
                data[2].append(r[2])  # platforms
                data[3].append(r[3])  # platform types
                bf_name_len = max(bf_name_len, len(k[1]))
                rep_name_len = max(rep_name_len, len(r[1]))
        min_size[bio_feature_name] = bf_name_len
        min_size[reporter_name] = rep_name_len
        for sample in header:
            rd = {
                rdv['bio_feature_reporter_id']: rdv['value']
                for rdv in RawData.objects.using(
                    compendium.compendium_nick_name).filter(
                        sample__id=sample['id'],
                        bio_feature_reporter_id__in=data[1]).values(
                            'bio_feature_reporter_id', 'value')
            }
            data.append([rd.get(r, np.nan) for r in data[1]])
        reporters_map = dict([y[:2] for x in bfr.values() for y in x])
        data[1] = [reporters_map[i]
                   for i in data[1]]  # use name instead of id for reporters
        store.append('raw_data',
                     pd.DataFrame(np.array(data).T, columns=columns),
                     format='table',
                     data_columns=True,
                     min_itemsize=min_size)
    store.close()
    header_flag = True
    with open(full_path_tsv, 'a') as f:
        for df in pd.read_hdf(full_path_hdf5, chunksize=line_number):
            df.to_csv(f, header=header_flag, sep='\t', index=False)
            header_flag = False

    compress_gz(full_path_tsv, full_path_gz)

    return full_path_gz.replace(base_dir, '')
예제 #17
0
def import_platform_mapping(self, user_id, compendium_id, plt_dir, platform_id,
                            filter_id, blast_file_name, channel_name, view,
                            operation):
    init_database_connections()
    user = User.objects.get(id=user_id)
    compendium = CompendiumDatabase.objects.get(id=compendium_id)
    task_id = self.request.id
    try:
        ViewTask.objects.using(compendium.compendium_nick_name). \
            get(view=view, operation=operation).delete()
    except Exception as e:
        pass
    channel_task = ViewTask(task_id=task_id, operation=operation, view=view)
    channel_task.save(using=compendium.compendium_nick_name)

    mapper = MicroarrayMapper(os.path.join(plt_dir, blast_file_name))
    mapper.set_filter_status(filter_id, 'running')
    Group("compendium_" + str(compendium_id)).send({
        'text':
        json.dumps({
            'stream': view,
            'payload': {
                'request': {
                    'operation': 'refresh'
                },
                'data': None
            }
        })
    })
    chunk_size = 30000
    start = 0
    total = mapper.get_filter_result_count()
    bio_feature_reporter_mapping_csv = os.path.join(
        plt_dir, blast_file_name) + '_bio_feature_reporter_mapping.csv'
    try:
        os.remove(bio_feature_reporter_mapping_csv)
    except Exception as e:
        pass
    with transaction.atomic(using=compendium.compendium_nick_name):
        without_mapping_before = set(
            BioFeatureReporter.objects.using(
                compendium.compendium_nick_name).filter(
                    platform_id=platform_id,
                    bio_feature__isnull=True).values_list(
                        'id', flat=True).distinct())
        with_mapping_before = dict(
            BioFeatureReporter.objects.using(
                compendium.compendium_nick_name).filter(
                    platform_id=platform_id,
                    bio_feature__isnull=False).values_list(
                        'id', 'bio_feature_id'))
        BioFeatureReporter.objects.using(compendium.compendium_nick_name).\
            filter(platform_id=platform_id).update(bio_feature=None)
        for chunk_num in range(int(math.ceil(total / chunk_size))):
            chunk = mapper.get_filter_result_dict(filter_id, start,
                                                  start + chunk_size,
                                                  self.is_aborted)
            if self.is_aborted():
                raise DatabaseError('Operation aborted by user')
            for rep_id, feat_id in chunk.items():
                BioFeatureReporter.objects.using(compendium.compendium_nick_name).\
                    filter(id=rep_id).update(bio_feature_id=feat_id)
            start += chunk_size

    return bio_feature_reporter_mapping_csv, list(
        without_mapping_before), with_mapping_before
예제 #18
0
def init_parsing(db_id, exp_id, get_name_only=False):
    init_database_connections()
    compendium = CompendiumDatabase.objects.get(id=db_id)
    experiment = Experiment.objects.using(compendium.compendium_nick_name).get(id=exp_id)
    base_dir = AdminOptions.objects.get(option_name='raw_data_directory')
    out_dir = os.path.join(base_dir.option_value, compendium.compendium_nick_name,
                           experiment.experiment_access_id)
    os.makedirs(out_dir, exist_ok=True)
    key = os.path.join(out_dir, experiment.experiment_access_id + '.sqlite')
    if get_name_only:
        return key
    value = {
        'ENGINE': 'django.db.backends.sqlite3',
        'NAME': key,
        'PARSING': True
    }
    connections.databases[key] = value
    try:
        exp = ParsingExperiment.objects.using(key).all()[0]
    except Exception as e:
        call_command('migrate', database=key)
        module_name, class_name = '.'.join(experiment.data_source.python_class.split('.')[:-1]), \
                                  experiment.data_source.python_class.split('.')[-1]
        python_class = getattr(importlib.import_module(module_name), class_name)()
        exp = ParsingExperiment()
        exp.organism = experiment.organism
        exp.experiment_access_id = experiment.experiment_access_id
        exp.description = experiment.description
        exp.experiment_name = experiment.experiment_name
        if experiment.scientific_paper_ref:
            exp.scientific_paper_ref = python_class.scientific_paper_accession_base_link + experiment.scientific_paper_ref
        exp.experiment_fk = experiment.id
        exp.save(using=key)
    try:
        ParsingPlatform.objects.using(key).all()[0]
    except Exception as e:
        platform_ids = list(set([sample.platform_id for sample in experiment.sample_set.all() if sample.platform_id]))
        for platform_id in platform_ids:
            platform = Platform.objects.using(compendium.compendium_nick_name).get(id=platform_id)
            plt = ParsingPlatform()
            plt.platform_access_id = platform.platform_access_id
            plt.platform_name = platform.platform_name
            plt.description = platform.description
            plt.platform_type = platform.platform_type.name if platform.platform_type else None
            plt.platform_fk = platform_id
            plt.reporter_platform = platform_id
            plt.reporter_platform_imported = platform.biofeaturereporter_set.count() > 0
            plt.save(using=key)
    try:
        ParsingSample.objects.using(key).all()[0]
    except Exception as e:
        for sample in experiment.sample_set.all():
            plt = ParsingPlatform.objects.using(key).get(platform_fk=sample.platform_id)
            smp = ParsingSample()
            smp.sample_name = sample.sample_name
            smp.description = sample.description
            smp.experiment = exp
            smp.platform = plt
            smp.sample_fk = sample.id
            smp.reporter_platform = plt.reporter_platform
            smp.reporter_platform_imported = plt.reporter_platform_imported
            smp.save(using=key)

    return key