sys.path.append("..")
sys.path.append(".")
from config import *
from helpers.basics import load_config
from helpers.logger import Logger
from helpers.db_helpers import * 


## Script specific 
import sys
import logging 
import collections
import datetime

if "log" not in globals():
  log = Logger.init_logger('STATS_%s'%(cfg.language_code), load_config())







def main():
	# mongodb stats can be obtained with cfg.db.command("collstats","dict")
	db_stats={
		'A/ report_date':datetime.datetime.now(),
		'B/ Number of samples':samples_col.count(),
		'C/ Number of normalized measures':measurements_col.count(),
		'C_a/ Tally of normalized measures':measurements_col.aggregate([{"$group":{"_id":"$type", "count": { "$sum": 1 }}}])['result'],
		'D/ Number of species':species_col.count(),
Esempio n. 2
0
#!/usr/bin/env python
# encoding: utf-8

import sys
sys.path.append("..")
sys.path.append(".")
from config import *
from helpers.basics import load_config
from helpers.logger import Logger
from helpers.db_helpers import * 


# Script supposed to be run in the background to populate the DB with available datasets 
if "log" not in globals():
  logger = Logger.init_logger('FLATTEN_%s'%(cfg.language_code), load_config())



logger.info("Running %s",sys.argv[0])



logger.info("Flattening and normalizing experimental results")
already_existing_xp=measurements_col.distinct("xp")
samples_with_results=samples_col.find({"experimental_results":{"$elemMatch":{"values":{"$exists":True}}}})
# a_sample=samples_with_results[0]
n_op=0
measurements_to_insert=measurements_col.initialize_unordered_bulk_op()
for a_sample in samples_with_results:
	# i,experimental_results=enumerate(a_sample['experimental_results']).next()
	for i,experimental_results in enumerate(a_sample['experimental_results']):
#!/usr/bin/env python
# encoding: utf-8

import sys
sys.path.append("..")
sys.path.append(".")
from config import *
from helpers.basics import load_config
from helpers.logger import Logger
from helpers.db_helpers import * 


# Script 
import datetime
if "log" not in globals():
  log = Logger.init_logger('SAMPLE_DATA_%s'%(cfg.language_code), load_config())

# clear db 

species_col.remove()
publications_col.remove()
samples_col.remove()
mappings_col.remove()
measurements_col.remove()



#### Melon 


# species 
Esempio n. 4
0
sys.path.append("..")
sys.path.append(".")
from config import *
from helpers.basics import load_config
from helpers.logger import Logger
from helpers.db_helpers import * 
from helpers.path import data_dir

# Script supposed to be run in the background to populate the DB with available datasets 
## Setup

from numbers import Number
import collections
from math import log
if "log" not in globals():
  logger = Logger.init_logger('DATA_PROCESSOR_%s'%(cfg.language_code), load_config())

logger.info("Running %s",sys.argv[0])

# Get available datasets and insert them in the DB 

# a_sample=samples_col.find_one({"experimental_results.values":{"$exists":False}})
samples_to_process=samples_col.find({"experimental_results":{"$elemMatch":{"values":{"$exists":False}}}})

logger.info("Found %d samples to process",samples_to_process.count())

for a_sample in samples_to_process:
	logger.info("Will process dataset for experiment %s",a_sample['name'])
	parser_config=a_sample['xls_parsing']
	for a_result_idx,a_result in [(i,x) for i,x in enumerate(a_sample['experimental_results']) if "values" not in x]:
		# specialize parser for the result 
Esempio n. 5
0
#!/usr/bin/env python
# encoding: utf-8

import sys
sys.path.append("..")
sys.path.append(".")
from config import *
from helpers.basics import load_config
from helpers.logger import Logger
from helpers.db_helpers import * 
from helpers.path import data_dir


# Script supposed to be run in the background to populate the DB with available datasets 
if "log" not in globals():
  logger = Logger.init_logger('PROCESS_MAPPINGS_%s'%(cfg.language_code), load_config())


# Script supposed to be run in the background to populate the DB with available datasets 




logger.info("Running %s",sys.argv[0])



# Get available mappings and process them 
mappings_to_process=mappings_col.find({"src_to_tgt":{"$exists":False}})

logger.info("Found %d mappings to process",mappings_to_process.count())