def download_experiment_public_db(request, *args, **kwargs): init_database_connections() values = json.loads(request.POST['values']) comp_id = request.POST['compendium_id'] channel_name = request.session['channel_name'] view = request.POST['view'] operation = request.POST['operation'] compendium = CompendiumDatabase.objects.get(id=comp_id) for exp_id in values: scheduled_status = Status.objects.using(compendium.compendium_nick_name).get( name='experiment_scheduled') exp = ExperimentSearchResult.objects.using(compendium.compendium_nick_name).get(id=exp_id) exp.status = scheduled_status exp.save(using=compendium.compendium_nick_name) experiment_public.experiment_public_download.apply_async( (request.user.id, comp_id, exp_id, channel_name, view, operation) ) Group("compendium_" + str(comp_id)).send({ 'text': json.dumps({ 'stream': view, 'payload': { 'request': {'operation': 'refresh'}, 'data': None } }) }) return HttpResponse(json.dumps({'success': True}), content_type="application/json")
def create_compendium(request, *args, **kwargs): values = json.loads(request.POST['values']) db = CompendiumDatabase() db.compendium_name = values['compendium_name'] db.compendium_nick_name = values['compendium_nick_name'] db.description = values['description'] db.html_description = values['html_description'] db.compendium_type = CompendiumType.objects.get(id=values['compendium_type']) db.db_engine = values['db_engine'] db.db_user = values.get('db_user', None) db.db_password = values.get('db_password', None) db.db_port = values.get('db_port', None) db.db_host = values.get('db_host', None) if values['create_db']: admin_db = copy.copy(db) admin_db.compendium_nick_name = values['admin']['username'] admin_db.db_user = values['admin']['username'] admin_db.db_password = values['admin']['password'] create_db(values['admin']['username'], admin_db.get_setting_entry()[1], db.compendium_nick_name, db.db_user) db.save() init_database_connections() Group('admin').send({ 'text': json.dumps({ 'stream': request.POST['view'], 'payload': { 'request': {'operation': 'refresh'}, 'data': None } }) }) channel_name = request.session['channel_name'] channel = Channel(channel_name) Group("compendium_" + str(db.id)).add(channel) return HttpResponse(json.dumps({'success': True}), content_type="application/json")
def check_bio_features(request): comp = json.loads(request.POST['compendium']) init_database_connections() compendium = CompendiumDatabase.objects.get(id=comp['id']) bio_features = BioFeature.objects.using(compendium.compendium_nick_name).count() > 0 return HttpResponse( json.dumps({ 'bio_features': bio_features }), content_type="application/json")
def run_parsing_bio_feature(self, user_id, compendium_id, file_path, bio_feature_name, file_type, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id parser_cls = None for cls in importers.importer_mapping[bio_feature_name]: if cls.FILE_TYPE_NAME == file_type: parser_cls = cls break parser = parser_cls(compendium.compendium_nick_name, bio_feature_name) parser.parse(file_path)
def run_alignment_filter(self, user_id, compendium_id, plt_dir, platform_id, blast_file_name, alignment_length_1, gap_open_1, mismatches_1, alignment_length_2, gap_open_2, mismatches_2, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id mapper = MicroarrayMapper(os.path.join(plt_dir, blast_file_name)) filter_params_id = mapper.write_params_db(float(alignment_length_1), float(gap_open_1), float(mismatches_1), float(alignment_length_2), float(gap_open_2), float(mismatches_2)) mapper.set_filter_status(filter_params_id, 'running') task_id = self.request.id try: ViewTask.objects.using(compendium.compendium_nick_name). \ get(view=view, operation=operation).delete() except Exception as e: pass channel_task = ViewTask(task_id=task_id, operation=operation, view=view) channel_task.save(using=compendium.compendium_nick_name) Group("compendium_" + str(compendium_id)).send({ 'text': json.dumps({ 'stream': view, 'payload': { 'request': { 'operation': 'refresh' }, 'data': None } }) }) try: mapper.filter(filter_params_id, self.is_aborted) if self.is_aborted(): raise DatabaseError('Operation aborted by user') except Exception as e: mapper.set_filter_status(filter_params_id, 'error') raise e return filter_params_id
def uncompress_file(self, user_id, compendium_id, exp_id, filename, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id operation = operation + "_" + str(exp_id) exp = Experiment.objects.using( compendium.compendium_nick_name).get(id=exp_id) base_dir = AdminOptions.objects.get(option_name='download_directory') base_dir = os.path.join(base_dir.option_value, compendium.compendium_nick_name, exp.experiment_access_id) exp_file = os.path.join(base_dir, filename) file_system.uncompress_file(exp_file, base_dir)
def experiment_public_search(self, user_id, compendium_id, term, db_id, channel_name, view, operation): init_database_connections() channel = Channel(channel_name) user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id try: ViewTask.objects.using(compendium.compendium_nick_name). \ get(view=view, operation=operation).delete() except Exception as e: pass channel_task = ViewTask(task_id=task_id, operation=operation, view=view) channel_task.save(using=compendium.compendium_nick_name) public_db_class_name = DataSource.objects.using( compendium.compendium_nick_name).get(id=db_id) Group("compendium_" + str(compendium_id)).send({ 'text': json.dumps({ 'stream': view, 'payload': { 'request': { 'operation': 'refresh' }, 'data': None } }) }) module_name, class_name = '.'.join( public_db_class_name.python_class.split('.') [:-1]), public_db_class_name.python_class.split('.')[-1] python_class = getattr(importlib.import_module(module_name), class_name)() results = python_class.search(term, user.email, db_id, self.is_aborted) experiment_status_download = Status.objects.using( compendium.compendium_nick_name).get(name='experiment_new') for result in results: result.status = experiment_status_download ExperimentSearchResult.objects.using( compendium.compendium_nick_name).filter( ~Q(status__name='experiment_scheduled') & ~Q(status__name='experiment_downloading')).delete() ExperimentSearchResult.objects.using( compendium.compendium_nick_name).bulk_create(results)
def get_experiment_files(request, *args, **kwargs): base_dir = AdminOptions.objects.get(option_name='download_directory').option_value compendium = CompendiumDatabase.objects.get(id=request.POST['compendium_id']) init_database_connections() experiment = Experiment.objects.using(compendium.compendium_nick_name).get(id=request.POST['values']) full_dir = os.path.join(base_dir, compendium.compendium_nick_name, experiment.experiment_access_id) files = glob.iglob(os.path.join(full_dir, '**', '*'), recursive=True) files = [{'name': os.path.basename(filename), 'path': filename.replace(base_dir, ''), 'type': os.path.splitext(filename)[1], 'date': time.strftime('%Y-%m-%d %H:%M', time.gmtime(os.path.getmtime(filename))), 'size': int(os.path.getsize(filename) / 1000)} for filename in files if os.path.isfile(filename)] return HttpResponse(json.dumps({'success': True, 'files': files, 'total': len(files)}), content_type="application/json")
def experiment_local_upload(self, user_id, compendium_id, exp_id, exp_name, exp_descr, exp_structure_file, exp_data_file, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id operation = operation + "_" + str(exp_id) base_dir = AdminOptions.objects.get(option_name='download_directory') base_dir = os.path.join(base_dir.option_value, compendium.compendium_nick_name, exp_id) exp_file = os.path.join(base_dir, exp_data_file) local_data_source = LocalDataSource() local_data_source.uncompress_experiment_file(exp_file, base_dir) os.rename( os.path.join(base_dir, exp_structure_file), os.path.join(base_dir, local_data_source.experiment_structure_filename)) local_data_source.create_experiment_structure(compendium_id, exp_id, base_dir)
def run_parsing_script(self, user_id, compendium_id, exp_id, entity_type, entity_name, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id operation = operation + "_" + str(exp_id) exp = Experiment.objects.using( compendium.compendium_nick_name).get(id=exp_id) sample = None platform = None entity_object_name = None running_status = Status.objects.using( compendium.compendium_nick_name).get(name='entity_script_running') assigned_files = [] base_path = os.path.dirname(parsing_scripts.__file__) if entity_type == 'experiment': entity_object_name = exp.experiment_access_id for af in exp.assignedfile_set.all(): af.status = running_status af.save(using=compendium.compendium_nick_name) af_dict = af.to_dict() af_dict['script_name'] = os.path.join(base_path, entity_type, af_dict['script_name']) af_dict['order'] = 0 if not af_dict['order'] else int( af_dict['order']) assigned_files.append(af_dict) elif entity_type == 'platform': platform = Platform.objects.using( compendium.compendium_nick_name).get(id=entity_name) entity_object_name = platform.platform_access_id for af in platform.assignedfile_set.all(): af.status = running_status af.save(using=compendium.compendium_nick_name) af_dict = af.to_dict() af_dict['script_name'] = os.path.join(base_path, entity_type, af_dict['script_name']) af_dict['order'] = 0 if not af_dict['order'] else int( af_dict['order']) assigned_files.append(af_dict) elif entity_type == 'sample': sample = Sample.objects.using(compendium.compendium_nick_name).get( experiment=exp, id=entity_name) entity_object_name = sample.sample_name for af in sample.assignedfile_set.all(): af.status = running_status af.save(using=compendium.compendium_nick_name) af_dict = af.to_dict() af_dict['script_name'] = os.path.join(base_path, entity_type, af_dict['script_name']) af_dict['order'] = 0 if not af_dict['order'] else int( af_dict['order']) assigned_files.append(af_dict) assigned_files.sort(key=lambda x: int(x['order'])) Group("compendium_" + str(compendium_id) + "_" + str(exp_id)).send({ 'text': json.dumps({ 'stream': entity_type + "_" + view, 'payload': { 'request': { 'operation': 'refresh' }, 'data': None } }) }) base_dir = AdminOptions.objects.get(option_name='raw_data_directory') out_dir = os.path.join(base_dir.option_value, compendium.compendium_nick_name, exp.experiment_access_id) key = os.path.join(out_dir, exp.experiment_access_id + '.sqlite') buffer = StringIO() sys.stdout = buffer context = {} soft_parser_dir = os.path.dirname(soft_file_parser.__file__) input_file_dir = AdminOptions.objects.get( option_name='download_directory').option_value input_file_dir = os.path.join(input_file_dir, compendium.compendium_nick_name, exp.experiment_access_id) experiment_proxy = ExperimentProxy( exp, key) if entity_type == 'experiment' else None platform_proxy = PlatformProxy(platform, key) if entity_type == 'platform' else None sample_proxy = SampleProxy(sample, key) if entity_type == 'sample' else None for assigned_file in assigned_files: script = assigned_file['script_name'] if os.path.isfile(script) and script.endswith('.py'): input_file_value = os.path.join(input_file_dir, assigned_file['input_filename']) context = { 'PARAMETERS': [p.strip() for p in assigned_file['parameters'].split(',')], 'INPUT_FILE': input_file_value, 'ENTITY_NAME': entity_object_name, 'EXPERIMENT_OBJECT': experiment_proxy, 'PLATFORM_OBJECT': platform_proxy, 'SAMPLE_OBJECT': sample_proxy, 'COMPENDIUM': compendium.compendium_nick_name } script_dir = os.path.dirname(script) util_dir = os.path.join(os.path.dirname(script_dir), 'utils') sys.path.append(script_dir) sys.path.append(util_dir) sys.path.append(soft_parser_dir) exec(open(script).read(), context) sys.stdout = sys.__stdout__ if 'EXPERIMENT_OBJECT' in context and isinstance( context['EXPERIMENT_OBJECT'], ExperimentProxy): context['EXPERIMENT_OBJECT'].save_experiment_object() if 'PLATFORM_OBJECT' in context and isinstance(context['PLATFORM_OBJECT'], PlatformProxy): context['PLATFORM_OBJECT'].save_platform_object() if 'SAMPLE_OBJECT' in context and isinstance(context['SAMPLE_OBJECT'], SampleProxy): context['SAMPLE_OBJECT'].save_sample_object() return buffer.getvalue()
def run_platform_mapper(self, user_id, compendium_id, plt_dir, platform_id, use_short_blastn, alignment_identity, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id blast_file_name = os.path.join(plt_dir, task_id + '.blast') open(blast_file_name, 'a').close() mapper = MicroarrayMapper(blast_file_name) mapper.create_db(alignment_identity, use_short_blastn) mapper.set_alignment_status('running') Group("compendium_" + str(compendium_id)).send({ 'text': json.dumps({ 'stream': view, 'payload': { 'request': { 'operation': 'refresh' }, 'data': None } }) }) report = '' # create gene fasta file (bio_feature) gene_file_name = os.path.join(plt_dir, task_id + '_gene.fasta') with open(gene_file_name, 'w') as f: gene_name = '' for gene in BioFeature.objects.using( compendium.compendium_nick_name).all(): try: gene_name = gene.name sequence = gene.biofeaturevalues_set.filter( bio_feature_field__name='sequence')[0].value f.write('>' + str(gene.id) + '\n' + str(sequence) + '\n') except Exception as e: report += 'You have no sequence for ' + gene_name + '<br>' # create probe fasta file probe_file_name = os.path.join(plt_dir, task_id + '_probe.fasta') with open(probe_file_name, 'w') as f: probe_name = '' for probe in BioFeatureReporter.objects.using(compendium.compendium_nick_name).\ filter(platform_id=platform_id).all(): try: probe_name = probe.name sequence = probe.biofeaturereportervalues_set.filter( bio_feature_reporter_field__name='sequence')[0].value f.write('>' + str(probe.id) + '\n' + str(sequence) + '\n') except Exception as e: report += 'You have no sequence for ' + probe_name + '<br>' # create blast db cmd = os.path.join(settings.BASE_DIR, 'command', 'external_programs', 'ncbi-blast', 'bin') cmd += '/makeblastdb -dbtype nucl -in ' + gene_file_name process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (out, err) = process.communicate() # do alignment cmd = os.path.join(settings.BASE_DIR, 'command', 'external_programs', 'ncbi-blast', 'bin') os.environ["PATH"] += os.pathsep + cmd outfmt = '\'6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen\'' blastn_cline = NcbiblastnCommandline(query=probe_file_name, db=gene_file_name, perc_identity=alignment_identity / 100.0, outfmt=outfmt, out=blast_file_name) if use_short_blastn: blastn_cline = NcbiblastnCommandline(query=probe_file_name, db=gene_file_name, perc_identity=alignment_identity / 100.0, task='blastn-short', outfmt=outfmt, out=blast_file_name) (out, err) = blastn_cline() # create stats db mapper.create_db(alignment_identity, use_short_blastn) os.chmod(mapper.blast_filename, 0o666) os.chmod(mapper.sqlite_filename, 0o666) return report
def run_file_assignment_script(self, user_id, compendium_id, exp_id, script_filename, parameters, input_files, experiment_entity, platform_entity, sample_entity, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id operation = operation + "_" + str(exp_id) exp = Experiment.objects.using( compendium.compendium_nick_name).get(id=exp_id) base_path = os.path.join(os.path.dirname(parsing_scripts.__file__), 'file_assignment') script = os.path.join(base_path, script_filename) buffer = StringIO() message = "" if os.path.isfile(script) and script.endswith('.py'): context = { 'PARAMETERS': parameters.split(','), 'INPUT_FILES': input_files } sys.stdout = buffer script_dir = os.path.dirname(script) util_dir = os.path.join(os.path.dirname(script_dir), 'utils') sys.path.append(script_dir) sys.path.append(util_dir) exec(open(script).read(), context) assign_function = context['assign'] ready_status = Status.objects.using( compendium.compendium_nick_name).get(name='entity_script_ready') message = "Assigned " with transaction.atomic(using=compendium.compendium_nick_name): files = assign_function(context['INPUT_FILES'], exp.to_dict(), 'experiment', context['PARAMETERS']) file_counter = 0 for file in files: if not experiment_entity[0]: continue file_counter += 1 assigned_file = AssignedFile() assigned_file.script_name = experiment_entity[0] assigned_file.order = experiment_entity[1] assigned_file.parameters = experiment_entity[2] assigned_file.input_filename = os.path.basename(file) assigned_file.entity_type = 'EXP' assigned_file.experiment = exp assigned_file.status = ready_status assigned_file.save(using=compendium.compendium_nick_name) message += str(file_counter) + " files to EXPERIMENT, " platforms = set() file_counter = 0 for sample in Sample.objects.using( compendium.compendium_nick_name).filter(experiment=exp): platforms.add(sample.platform) files = assign_function(context['INPUT_FILES'], sample.to_dict(), 'sample', context['PARAMETERS']) for file in files: if not sample_entity[0]: continue file_counter += 1 assigned_file = AssignedFile() assigned_file.script_name = sample_entity[0] assigned_file.order = sample_entity[1] assigned_file.parameters = sample_entity[2] assigned_file.input_filename = os.path.basename(file) assigned_file.entity_type = 'SMP' assigned_file.sample = sample assigned_file.status = ready_status assigned_file.save(using=compendium.compendium_nick_name) message += str(file_counter) + " files to SAMPLES, " file_counter = 0 for platform in platforms: files = assign_function(context['INPUT_FILES'], platform.to_dict(), 'platform', context['PARAMETERS']) for file in files: if not platform_entity[0]: continue file_counter += 1 assigned_file = AssignedFile() assigned_file.script_name = platform_entity[0] assigned_file.order = platform_entity[1] assigned_file.parameters = platform_entity[2] assigned_file.input_filename = os.path.basename(file) assigned_file.entity_type = 'PLT' assigned_file.platform = platform assigned_file.status = ready_status assigned_file.save(using=compendium.compendium_nick_name) message += str(file_counter) + " files to PLATFORMS" sys.stdout = sys.__stdout__ return message, buffer.getvalue()
def init_compendium(request, *args, **kwargs): init_compendium.init_compendium(request.POST['values']) init_database_connections() msg = Message(type='info', title='Compendium initialized', message='Compendium successfully initialized') return HttpResponse(json.dumps(msg.to_dict()), content_type="application/json")
def read_experiments_channel(channel_name, view, request, user): channel = Channel(channel_name) start = 0 end = None compendium = CompendiumDatabase.objects.get( id=request['compendium_id']) #task_running = False operation = 'search_experiment_public_db' if request['page_size']: start = (request['page'] - 1) * request['page_size'] end = start + request['page_size'] #try: # task_id = ViewTask.objects.using(compendium.compendium_nick_name).get(view=request['view'], # operation=operation) # task = AsyncResult(task_id.task_id) # task_running = not task.ready() #except Exception as e: # pass order = '' if request['ordering'] == 'DESC': order = '-' exp_ids = set() # experiment init_database_connections() query_response = Experiment.objects.using(compendium.compendium_nick_name). \ filter(Q(organism__icontains=request['filter']) | Q(experiment_access_id__icontains=request['filter']) | Q(scientific_paper_ref__icontains=request['filter']) | Q(description__icontains=request['filter']) | Q(experiment_name__icontains=request['filter'])).values_list('id', flat=True) exp_ids.update(query_response) # sample number query_response = Sample.objects.using(compendium.compendium_nick_name). \ all().values('experiment').annotate(total=Count('id')) exp_ids.update([ resp['experiment'] for resp in query_response if request['filter'] in str(resp['total']) ]) # platform access id platform_ids = Platform.objects.using(compendium.compendium_nick_name). \ filter(Q(platform_access_id__icontains=request['filter'])).values_list('id', flat=True) query_response = Sample.objects.using(compendium.compendium_nick_name). \ filter(platform_id__in=platform_ids).values_list('experiment_id', flat=True) exp_ids.update(query_response) ordererd = False query_response = Experiment.objects.using(compendium.compendium_nick_name). \ filter(id__in=exp_ids) try: query_response.order_by(order + request['ordering_value'])[0] query_response = query_response.order_by(order + request['ordering_value']) ordererd = True except Exception as e: pass total = query_response.count() query_response = query_response[start:end] experiments = [] for exp in query_response: e = exp.to_dict() e['status_description'] = exp.status.description if exp.status.name == 'experiment_excluded': e['description'] = 'EXCLUDED: ' + exp.comments + '. ' + e[ 'description'] module_name, class_name = '.'.join(exp.data_source.python_class.split('.')[:-1]), \ exp.data_source.python_class.split('.')[-1] python_class = getattr(importlib.import_module(module_name), class_name)() e['experiment_accession_base_link'] = python_class.experiment_accession_base_link e['platform_accession_base_link'] = python_class.platform_accession_base_link e['scientific_paper_accession_base_link'] = python_class.scientific_paper_accession_base_link e['platforms'] = [ plt.to_dict() for plt in Platform.objects. using(compendium.compendium_nick_name).distinct().filter( pk__in=set( exp.sample_set.values_list('platform_id', flat=True))) ] e['n_samples'] = exp.sample_set.count() experiments.append(e) reverse = order == '-' if not ordererd: if request['ordering_value'] == 'platforms': experiments.sort(reverse=reverse, key=lambda x: ','.join([ p['platform_access_id'] for p in x[request['ordering_value']] ])) else: experiments.sort(reverse=reverse, key=lambda x: x[request['ordering_value']]) channel.send({ 'text': json.dumps({ 'stream': view, 'payload': { 'request': request, 'data': { 'experiments': experiments, #'task_running': task_running, 'total': total } } }) })
def experiment_public_download(self, user_id, compendium_id, experiment_id, channel_name, view, operation): init_database_connections() channel = Channel(channel_name) user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id operation = operation + "_" + str(experiment_id) downloading_status = Status.objects.using( compendium.compendium_nick_name).get(name='experiment_downloading') exp = ExperimentSearchResult.objects.using( compendium.compendium_nick_name).get(id=experiment_id) exp.status = downloading_status exp.save(using=compendium.compendium_nick_name) try: ViewTask.objects.using(compendium.compendium_nick_name). \ get(operation=operation, view=view).delete() except Exception as e: pass channel_task = ViewTask(task_id=task_id, operation=operation, view=view) channel_task.save(using=compendium.compendium_nick_name) data_ready_status = Status.objects.using( compendium.compendium_nick_name).get(name='experiment_data_ready') base_output_directory = AdminOptions.objects.get( option_name='download_directory') exp = ExperimentSearchResult.objects.using( compendium.compendium_nick_name).get(id=experiment_id) out_dir = os.path.join(base_output_directory.option_value, compendium.compendium_nick_name, exp.experiment_access_id) os.makedirs(out_dir, exist_ok=True) Group("compendium_" + str(compendium_id)).send({ 'text': json.dumps({ 'stream': view, 'payload': { 'request': { 'operation': 'refresh' }, 'data': None } }) }) log_message = '' module_name, class_name = '.'.join(exp.data_source.python_class.split('.')[:-1]), \ exp.data_source.python_class.split('.')[-1] python_class = getattr(importlib.import_module(module_name), class_name)() python_class.download_experiment_files(exp.experiment_access_id, user.email, out_dir) try: Experiment.objects.using(compendium.compendium_nick_name).get( experiment_access_id=exp.experiment_access_id) message = Message( type='info', title='Experiment already exists', message='The experiment ' + exp.experiment_access_id + ' is already present in the database. Data have been download anyway.' ) message.send_to(channel) except Exception as e: log_message = python_class.create_experiment_structure( compendium_id, experiment_id, out_dir) exp.status = data_ready_status exp.save(using=compendium.compendium_nick_name) return log_message
def export_raw_data(self, user_id, compendium_id, path, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id os.makedirs(path, exist_ok=True) millis = int(round(time.time() * 1000)) base_dir = AdminOptions.objects.get( option_name='raw_data_directory').option_value file_path_hdf5 = 'export_data_' + str(task_id) + '_' + str( millis) + '.hdf5' file_path_tsv = 'export_data_' + str(task_id) + '_' + str(millis) + '.tsv' file_path_gz = 'export_data_' + str(task_id) + '_' + str( millis) + '.tsv.gz' full_path_hdf5 = os.path.join(path, file_path_hdf5) full_path_tsv = os.path.join(path, file_path_tsv) full_path_gz = os.path.join(path, file_path_gz) try: for fl in glob.glob(path + '/*.tsv'): os.remove(fl) for fl in glob.glob(path + '/*.hdf5'): os.remove(fl) for fl in glob.glob(path + '/*.gz'): os.remove(fl) except Exception as e: pass store = pd.HDFStore(full_path_hdf5) header = Sample.objects.using(compendium.compendium_nick_name).\ order_by('platform', 'experiment').values('id', 'sample_name') bio_features = BioFeature.objects.using(compendium.compendium_nick_name). \ order_by('name') bio_feature_name = compendium.compendium_type.bio_feature_name reporter_name = 'reporter ({})'.format(','.join([ plt.platform_type.description for plt in Platform.objects.using( compendium.compendium_nick_name).all() if plt.platform_type ])) columns = [bio_feature_name, reporter_name] + [ 'Platform', 'Platform type' ] + [s['sample_name'] for s in header] max_sample_name = max([len(s['sample_name']) for s in header]) min_size = {s['sample_name']: max_sample_name for s in header} min_size['Platform'] = max([ len(plt.platform_access_id) for plt in Platform.objects.using(compendium.compendium_nick_name) ]) min_size['Platform type'] = max([ len(plt.description) for plt in PlatformType.objects.using(compendium.compendium_nick_name) ]) df = pd.DataFrame(columns=columns) store.put('raw_data', df, format='table', data_columns=True, min_itemsize=min_size) line_number = 50000 batch_size = int((line_number * bio_features.count()) / BioFeatureReporter.objects.using( compendium.compendium_nick_name).count()) for start, end, total, qs in batch_qs(bio_features, batch_size=batch_size): bfr = {(bf.id, bf.name): list( bf.biofeaturereporter_set.order_by('platform').values_list( 'id', 'name', 'platform__platform_access_id', 'platform__platform_type__description')) for bf in qs} bf_name_len = 15 rep_name_len = 15 data = [ [], # bio_features [], # reporters [], # platforms [] # platform types ] for k, v in bfr.items(): for r in v: data[0].append(k[1]) # bio_features data[1].append(r[0]) # reporters data[2].append(r[2]) # platforms data[3].append(r[3]) # platform types bf_name_len = max(bf_name_len, len(k[1])) rep_name_len = max(rep_name_len, len(r[1])) min_size[bio_feature_name] = bf_name_len min_size[reporter_name] = rep_name_len for sample in header: rd = { rdv['bio_feature_reporter_id']: rdv['value'] for rdv in RawData.objects.using( compendium.compendium_nick_name).filter( sample__id=sample['id'], bio_feature_reporter_id__in=data[1]).values( 'bio_feature_reporter_id', 'value') } data.append([rd.get(r, np.nan) for r in data[1]]) reporters_map = dict([y[:2] for x in bfr.values() for y in x]) data[1] = [reporters_map[i] for i in data[1]] # use name instead of id for reporters store.append('raw_data', pd.DataFrame(np.array(data).T, columns=columns), format='table', data_columns=True, min_itemsize=min_size) store.close() header_flag = True with open(full_path_tsv, 'a') as f: for df in pd.read_hdf(full_path_hdf5, chunksize=line_number): df.to_csv(f, header=header_flag, sep='\t', index=False) header_flag = False compress_gz(full_path_tsv, full_path_gz) return full_path_gz.replace(base_dir, '')
def import_platform_mapping(self, user_id, compendium_id, plt_dir, platform_id, filter_id, blast_file_name, channel_name, view, operation): init_database_connections() user = User.objects.get(id=user_id) compendium = CompendiumDatabase.objects.get(id=compendium_id) task_id = self.request.id try: ViewTask.objects.using(compendium.compendium_nick_name). \ get(view=view, operation=operation).delete() except Exception as e: pass channel_task = ViewTask(task_id=task_id, operation=operation, view=view) channel_task.save(using=compendium.compendium_nick_name) mapper = MicroarrayMapper(os.path.join(plt_dir, blast_file_name)) mapper.set_filter_status(filter_id, 'running') Group("compendium_" + str(compendium_id)).send({ 'text': json.dumps({ 'stream': view, 'payload': { 'request': { 'operation': 'refresh' }, 'data': None } }) }) chunk_size = 30000 start = 0 total = mapper.get_filter_result_count() bio_feature_reporter_mapping_csv = os.path.join( plt_dir, blast_file_name) + '_bio_feature_reporter_mapping.csv' try: os.remove(bio_feature_reporter_mapping_csv) except Exception as e: pass with transaction.atomic(using=compendium.compendium_nick_name): without_mapping_before = set( BioFeatureReporter.objects.using( compendium.compendium_nick_name).filter( platform_id=platform_id, bio_feature__isnull=True).values_list( 'id', flat=True).distinct()) with_mapping_before = dict( BioFeatureReporter.objects.using( compendium.compendium_nick_name).filter( platform_id=platform_id, bio_feature__isnull=False).values_list( 'id', 'bio_feature_id')) BioFeatureReporter.objects.using(compendium.compendium_nick_name).\ filter(platform_id=platform_id).update(bio_feature=None) for chunk_num in range(int(math.ceil(total / chunk_size))): chunk = mapper.get_filter_result_dict(filter_id, start, start + chunk_size, self.is_aborted) if self.is_aborted(): raise DatabaseError('Operation aborted by user') for rep_id, feat_id in chunk.items(): BioFeatureReporter.objects.using(compendium.compendium_nick_name).\ filter(id=rep_id).update(bio_feature_id=feat_id) start += chunk_size return bio_feature_reporter_mapping_csv, list( without_mapping_before), with_mapping_before
def init_parsing(db_id, exp_id, get_name_only=False): init_database_connections() compendium = CompendiumDatabase.objects.get(id=db_id) experiment = Experiment.objects.using(compendium.compendium_nick_name).get(id=exp_id) base_dir = AdminOptions.objects.get(option_name='raw_data_directory') out_dir = os.path.join(base_dir.option_value, compendium.compendium_nick_name, experiment.experiment_access_id) os.makedirs(out_dir, exist_ok=True) key = os.path.join(out_dir, experiment.experiment_access_id + '.sqlite') if get_name_only: return key value = { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': key, 'PARSING': True } connections.databases[key] = value try: exp = ParsingExperiment.objects.using(key).all()[0] except Exception as e: call_command('migrate', database=key) module_name, class_name = '.'.join(experiment.data_source.python_class.split('.')[:-1]), \ experiment.data_source.python_class.split('.')[-1] python_class = getattr(importlib.import_module(module_name), class_name)() exp = ParsingExperiment() exp.organism = experiment.organism exp.experiment_access_id = experiment.experiment_access_id exp.description = experiment.description exp.experiment_name = experiment.experiment_name if experiment.scientific_paper_ref: exp.scientific_paper_ref = python_class.scientific_paper_accession_base_link + experiment.scientific_paper_ref exp.experiment_fk = experiment.id exp.save(using=key) try: ParsingPlatform.objects.using(key).all()[0] except Exception as e: platform_ids = list(set([sample.platform_id for sample in experiment.sample_set.all() if sample.platform_id])) for platform_id in platform_ids: platform = Platform.objects.using(compendium.compendium_nick_name).get(id=platform_id) plt = ParsingPlatform() plt.platform_access_id = platform.platform_access_id plt.platform_name = platform.platform_name plt.description = platform.description plt.platform_type = platform.platform_type.name if platform.platform_type else None plt.platform_fk = platform_id plt.reporter_platform = platform_id plt.reporter_platform_imported = platform.biofeaturereporter_set.count() > 0 plt.save(using=key) try: ParsingSample.objects.using(key).all()[0] except Exception as e: for sample in experiment.sample_set.all(): plt = ParsingPlatform.objects.using(key).get(platform_fk=sample.platform_id) smp = ParsingSample() smp.sample_name = sample.sample_name smp.description = sample.description smp.experiment = exp smp.platform = plt smp.sample_fk = sample.id smp.reporter_platform = plt.reporter_platform smp.reporter_platform_imported = plt.reporter_platform_imported smp.save(using=key) return key