def import_definitions(): import DataDefinitions with open('data/definitions.json', 'r') as fd: json_data = fd.read().decode('utf-8') DataDefinitions.import_definitions_into_db(json_data, True) print 'done'
def test_generate_paths_apriori_mp(): import DataDefinitions ProcessTracking.process_start('generate_paths_apriori') run_on_temp_data() try: Path.drop() Path.db_setup() settings = Settings.load_dict([ 'path_min_support', 'path_min_confidence', 'path_max_k', 'path_min_students' ]) # Path.generate_paths(DataDefinitions.elements) Path.test_generate_paths_with_generator( DataDefinitions.get_elements(), { 'finished': True, 'ignore': False, 'stg': 'DTB' }, True, settings['path_min_support'], settings['path_min_confidence'], settings['path_max_k'], settings['path_min_students']) ProcessTracking.process_done('generate_paths_apriori') except: ProcessTracking.process_failed('generate_paths_apriori', {'error': traceback.format_exc()}) raise
def get_queries(): queries = dict() for name, query in DataDefinitions.get_queries().iteritems(): queries[name] = query.get_dict() return queries
def get_definitions(): import DataDefinitions hashes = set() for path_el in DataDefinitions.get_elements(): if path_el.md5_id() in hashes: print path_el.md5_id(), 'exists' else: print path_el.md5_id() hashes.add(path_el.md5_id()) print path_el.get_dict() el_count = len(DataDefinitions.get_elements()) hashes_count = len(hashes) # print el_count, math.factorial(el_count)/math.factorial(el_count-3)/math.factorial(2-1) print el_count, hashes_count, math.factorial(el_count) / ( math.factorial(el_count - 3) * math.factorial(3))
def get_queries(settings): filters = dict() allowed_stgs = UserTools.get_allowed_stgs(g.user) for name, query in DataDefinitions.get_queries().iteritems(): if allowed_stgs is not None and query.q == 'stg' and len( allowed_stgs) == 1: continue filters[query.md5_id()] = query.get_dict( replace_vars={'{cp_label}': settings['cp_label']}) return filters
def generate_paths_apriori(): import DataDefinitions ProcessTracking.process_start('generate_paths_apriori') run_on_temp_data() try: Path.drop() Path.db_setup() # Path.generate_paths(DataDefinitions.elements) Path.generate_paths_with_generator(DataDefinitions.get_elements()) ProcessTracking.process_done('generate_paths_apriori') except: ProcessTracking.process_failed('generate_paths_apriori', {'error': traceback.format_exc()}) raise
def generate_definitions(): """ Generated definitions """ import DataDefinitions ProcessTracking.process_start('generate_definitions') # DataDefinitions.create_definitions() DataDefinitions.generate_definitions_in_db() DataDefinitions.load_definitions_from_db() DataDefinitions.save_definitions_to_meta_data() ProcessTracking.process_done('generate_definitions')
def test_definitions_db(): import DataDefinitions run_on_temp_data() DataDefinitions.load_definitions_from_db() print 'loaded' DataDefinitions.save_definitions_to_meta_data() print 'saved' md = MetaData.find_by_id('definitionsDate') print type(md.data), md.data DataDefinitions.load_definitions_from_meta_data() print 'loaded'
def permutations(): import DataDefinitions elements = DataDefinitions.get_elements()[0:91] k = 7 # elements = elements comb_count = math.factorial(len(elements)) / \ (math.factorial(len(elements) - k) * math.factorial(k)) index = 0L start = time.time() for els in itertools.combinations(elements, k): if index % 100000 == 0: print index, '/', comb_count, 'time:', time.time() - start index += 1 print index, '/', comb_count, 'time:', time.time() - start
def check_path_counts(): import DataDefinitions run_on_temp_data() all_paths = Path.find({}) elements = [ pe for pe in DataDefinitions.get_elements() if not pe.query.ignore and pe.condition.name != 'success' ] ba = Student.get_students_bitarray(elements, { 'ignore': False, 'finished': True }) for path in all_paths: db_query = {'ignore': False, 'finished': True} all_pe = [] for pe in path.filter_elements: pe.get_db_query(db_query) all_pe.append(pe) db_query_count = db_query.copy() path_count_ba = ba.count_matching(path.filter_elements) path_count = Student.find(db_query_count).count() for pe in path.elements: pe.get_db_query(db_query) all_pe.append(pe) path_matched_ba = ba.count_matching(all_pe) path_matched = Student.find(db_query).count() if path_count != path.count or path_matched != path.matched: print 'path ba:', path_matched_ba, '/', path_count_ba, ' db:', path_matched, '/', path_count, '!=', path.get_str( ) print '\t count', db_query_count print '\t matched', db_query
def find_path_elements(): import DataDefinitions for pe in DataDefinitions.get_elements_by_query(Query('success')): print hash(pe) print pe.get_dict()
def export_definitions(): import DataDefinitions json = DataDefinitions.export_definitions_from_db() with open('data/definitions.json', 'w') as fd: fd.write(json.encode('utf-8')) print 'done'
def save_defintions_to_db(): import DataDefinitions DataDefinitions.save_defintions_to_db() print 'Done'
def is_field_allowed(field, user_role, query_types): if field not in DataDefinitions.get_queries() and field not in query_types: return False if not DB.Student.is_field_allowed(field, user_role): return False return True
def handle(): # if request.method == 'POST': # name = request.form['name'] status = 200 query_types = { 'ident': 'int', 'status': 'int', 'risk.mean': 'float', 'risk.avg': 'float', 'risk.median': 'float', 'risk.median_scaled': 'float', 'risk_all.median_scaled': 'float', 'risk_stg.median_scaled': 'float', 'risk_degree.median_scaled': 'float', 'risk.q25': 'float', 'risk.q75': 'float' } sortable = DB.Student().__dict__.keys() sortable.extend([ '_id', 'risk.median', 'risk.median_scaled', 'risk_all.median_scaled', 'risk_stg.median_scaled', 'risk_degree.median_scaled', 'risk.mean', 'risk.q25', 'risk.q75', 'semester_data.sem_1.bonus_total', 'semester_data.sem_1.grade', 'semester_data.sem_1.delayed', 'semester_data.sem_1.failed', 'semester_data.sem_1.successful', 'semester_data.sem_1.count_KL', 'semester_data.sem_2.bonus_total', 'semester_data.sem_2.grade', 'semester_data.sem_2.delayed', 'semester_data.sem_2.failed', 'semester_data.sem_2.successful', 'semester_data.sem_2.count_KL', 'semester_data.sem_3.bonus_total', 'semester_data.sem_3.grade', 'semester_data.sem_3.delayed', 'semester_data.sem_3.failed', 'semester_data.sem_3.successful', 'semester_data.sem_3.count_KL' ]) limit = request.args.get('limit', default=20, type=int) start = request.args.get('start', default=0, type=int) sort1 = request.args.get('sort1', default='_id,-1').split(',') sort2 = request.args.get('sort2', default='').split(',') with_definitions = request.args.get('definitions', default='false') mlist = request.args.get('mlist', default=None) do_calc = request.args.get('do_calc', default=None) groups = request.args.get('groups', default=None) single_groups = request.args.get('single_groups', default=None) calculations = request.args.get('calculations', default=None) is_csv = request.args.get('output', default='json') == 'csv' user_role = g.user_role ret = { 'start': start, 'limit': limit, 'count': 0, 'list': None, 'query': None, 'sort': None } if with_definitions == 'true': ret['definitions'] = get_definitions() db_query = dict() db_queries = [] # for restrictions db_sort = [] if len(sort1) == 2 and sort1[0] in sortable: db_sort.append((sort1[0], int(sort1[1]))) if len(sort2) == 2 and sort2[0] in sortable: db_sort.append((sort2[0], int(sort2[1]))) settings = DB.Settings.load_dict([ 'lights', 'main_risk_group', 'hide_finished_ident_data', 'hide_finished_after_days', 'student_ident_string' ]) # filter by MarkedList if mlist is not None: ml = DB.MarkedList.find_one({'_id': mlist}) if ml is not None and ml.is_allowed(g.username, user_role): student_ids = list(ml.list) if not settings['student_ident_string']: student_ids = [int(x) for x in student_ids] db_query['_id'] = {'$in': student_ids} ret['mlist'] = ml.get_dict() ret['mlist']['is_writable'] = ml.is_writable(g.username, user_role) else: return respond({'error': 'invalid_mlist'}, 400) filter_elements = request.args.get('elements', default=None) if filter_elements is not None: for fe_id in filter_elements.split(','): fe = DataDefinitions.get_element_by_hash(long(fe_id)) if fe is not None: fe.get_db_query( db_query ) # apply condition from filter element to db_query if settings['student_ident_string']: query_types['ident'] = 'str' for name in ['risk', 'risk_all', 'risk_stg', 'risk_degree']: if name in request.args: if request.args[name] == 'green': db_query[name + '.median_scaled'] = { '$lt': settings['lights'][1] } if request.args[name] == 'yellow': db_query[name + '.median_scaled'] = { '$gte': settings['lights'][1], '$lt': settings['lights'][2] } if request.args[name] == 'red': db_query[name + '.median_scaled'] = { '$gte': settings['lights'][2] } for name in request.args: if name in query_types: dbfield = name if name == 'ident': dbfield = '_id' try: value = request.args.get(name) db_query[dbfield] = DB.get_db_query_by_type( value, query_types[name]) continue except ValueError: return respond({'error': 'invalid_filter', 'name': name}, 400) if name == 'tags': try: value = request.args.get(name).split(',') db_query[name] = {'$all': value} continue except ValueError: return respond({'error': 'invalid_filter', 'name': name}, 400) if name == 'idents': try: if settings['student_ident_string']: value = request.args.get(name).split(',') db_query['_id'] = {'$in': value} else: db_query['_id'] = DB.get_db_query_by_type( request.args.get(name), 'in_intlist') continue except ValueError: return respond({'error': 'invalid_filter', 'name': name}, 400) if name not in DataDefinitions.get_queries(): continue query = DataDefinitions.get_queries()[name] try: value = request.args.get(name) db_query[name] = get_db_query(value, query) except ValueError: return respond({'error': 'invalid_filter', 'name': name}, 400) allowed_stgs = UserTools.get_allowed_stgs(g.user) if allowed_stgs: db_queries.append({'stg': {'$in': allowed_stgs}}) if settings['hide_finished_after_days'] != -1: earliest = datetime.utcfromtimestamp( time.time() - float(settings['hide_finished_after_days']) * 86400) db_queries.append({ '$or': [{ 'finished': True, 'exm_date': { '$gt': earliest } }, { 'finished': False }] }) if len(db_queries) > 0: db_queries.append(db_query) db_query = {'$and': db_queries} if groups is not None: if not UserTools.has_right('students_data', user_role) and not UserTools.has_right( 'student_analytics', user_role): return respond({'error': 'no_rights'}, 403) allowed_groups = [] if not isinstance(groups, unicode): return respond({'error': 'invalid_groups'}, 400) for name in groups.split(','): if not is_field_allowed(name, g.user_role, query_types): return respond({'error': 'invalid_group', 'name': name}, 400) allowed_groups.append(name) allowed_calculations = list() allowed_ops = ['sum', 'avg', 'max', 'min', 'addToSet'] if isinstance(calculations, unicode): for full_name in calculations.split(','): op, name = full_name.split('.', 1) if not is_field_allowed(name, g.user_role, query_types) or op not in allowed_ops: continue allowed_calculations.append({'field': name, 'op': op}) ret['groups'] = allowed_groups ret['calculations'] = allowed_calculations ret['group_results'] = DB.Student.calc_groups(allowed_groups, db_query, allowed_calculations) elif single_groups is not None: if not UserTools.has_right('students_data', user_role) and not UserTools.has_right( 'student_analytics', user_role): return respond({'error': 'no_rights'}, 403) allowed_groups = [] if not isinstance(single_groups, unicode): return respond({'error': 'invalid_groups'}, 400) for name in single_groups.split(','): if not is_field_allowed(name, g.user_role, query_types): return respond({'error': 'invalid_group', 'name': name}, 400) allowed_groups.append(name) allowed_calculations = list() allowed_ops = ['sum', 'avg', 'max', 'min', 'addToSet'] if isinstance(calculations, unicode): for full_name in calculations.split(','): op, name = full_name.split('.', 1) if not is_field_allowed(name, g.user_role, query_types) or op not in allowed_ops: continue allowed_calculations.append({'field': name, 'op': op}) ret['single_groups'] = allowed_groups ret['calculations'] = allowed_calculations ret['group_results'] = DB.Student.calc_single_groups( allowed_groups, db_query, allowed_calculations) elif do_calc == 'sums': if not UserTools.has_right('students_data', user_role) and not UserTools.has_right( 'student_analytics', user_role): return respond({'error': 'no_rights'}, 403) ret['sums'] = DB.Student.calc_sums(db_query) elif do_calc == 'avgs': if not UserTools.has_right('students_data', user_role) and not UserTools.has_right( 'student_analytics', user_role): return respond({'error': 'no_rights'}, 403) ret['avgs'] = None risk_values_allowed_key = 'risk_value_' + user_role settings = DB.Settings.load_dict([risk_values_allowed_key]) avgs = DB.Student.calc_avgs(db_query) if avgs: ret['avgs'] = {} for key, value in avgs.iteritems(): if 'risk' in key and not settings.get(risk_values_allowed_key, True): continue if key in DB.Student.restricted_fields: if UserTools.has_right(DB.Student.restricted_fields[key], user_role): ret['avgs'][key] = value else: ret['avgs'][key] = value elif is_csv: if not UserTools.has_right('students_data', user_role): return respond({'error': 'no_rights'}, 403) cursor = DB.Student.find(db_query, sort=db_sort) return respond_csv(cursor, ret) else: if not UserTools.has_right('students_data', user_role): return respond({'error': 'no_rights'}, 403) if not 1 <= limit <= 1000: return respond({'error': 'invalid_limit'}, 400) try: cursor = DB.Student.find(db_query, limit=limit, skip=start, sort=db_sort) ret['count'] = cursor.count() ret['list'] = [ s.get_dict(user_role, hide_finished_ident_data=settings[ 'hide_finished_ident_data']) for s in cursor ] except DB.errors.OperationFailure as e: ret['count'] = 0 ret['error'] = e.message status = 500 ret['query'] = repr(db_query) ret['sort'] = repr(db_sort) return respond(ret, status)
def get_definitions(): data = { 'path_elements': {}, 'restricted': [] # list of restricted fields } user_role = g.user_role for field, role in DB.Student.restricted_fields.iteritems(): if not UserTools.has_right(role, user_role): data['restricted'].append(field) data['list_identification_data'] = UserTools.has_right( 'list_identification_data', user_role) allowed_stgs = UserTools.get_allowed_stgs(g.user) for pe in DataDefinitions.get_elements(): if allowed_stgs is not None and pe.query.q == 'stg_original' \ and DB.Course.get_mapped_short(pe.condition.compare_value) not in allowed_stgs: continue if allowed_stgs is not None and pe.query.q == 'stg' \ and (pe.condition.compare_value not in allowed_stgs or len(allowed_stgs) == 1): continue data['path_elements'][pe.md5_id()] = pe.get_dict(query_id=True) last_date = DB.MetaData.find_by_id('lastDate') data[ 'lastDate'] = last_date.data['date'] if last_date is not None else None data['user_roles'] = UserTools.user_roles.keys() risk_values_allowed_key = 'risk_value_' + user_role settings = DB.Settings.load_dict([ risk_values_allowed_key, 'generate_risk_group_all', 'generate_risk_group_stg', 'generate_risk_group_degree', 'main_risk_group', 'compare_averages', 'cp_label', 'hide_resigned', 'hide_median_risk', 'hide_student_fields', 'hide_applicant_fields' ]) data['queries'] = get_queries(settings) data['lights'] = DB.Settings.load_dict_for_key('lights') data['generate_risk_group_all'] = settings['generate_risk_group_all'] data['generate_risk_group_stg'] = settings['generate_risk_group_stg'] data['generate_risk_group_degree'] = settings['generate_risk_group_degree'] data['main_risk_group'] = settings['main_risk_group'] data['risk_value_allowed'] = settings.get(risk_values_allowed_key, True) data['compare_averages'] = settings['compare_averages'] data['hide_resigned'] = settings['hide_resigned'] data['hide_median_risk'] = settings['hide_median_risk'] data['hide_student_fields'] = settings['hide_student_fields'] data['hide_applicant_fields'] = settings['hide_applicant_fields'] data['tags'] = [] for item in DB.Tag.find({}, sort=[['order', 1]]): data['tags'].append(item.get_dict()) data['students_view'] = g.students_view return data