Beispiel #1
0
def import_definitions():
    import DataDefinitions
    with open('data/definitions.json', 'r') as fd:
        json_data = fd.read().decode('utf-8')
        DataDefinitions.import_definitions_into_db(json_data, True)

    print 'done'
Beispiel #2
0
def test_generate_paths_apriori_mp():
    import DataDefinitions
    ProcessTracking.process_start('generate_paths_apriori')
    run_on_temp_data()
    try:
        Path.drop()
        Path.db_setup()

        settings = Settings.load_dict([
            'path_min_support', 'path_min_confidence', 'path_max_k',
            'path_min_students'
        ])

        # Path.generate_paths(DataDefinitions.elements)
        Path.test_generate_paths_with_generator(
            DataDefinitions.get_elements(), {
                'finished': True,
                'ignore': False,
                'stg': 'DTB'
            }, True, settings['path_min_support'],
            settings['path_min_confidence'], settings['path_max_k'],
            settings['path_min_students'])
        ProcessTracking.process_done('generate_paths_apriori')
    except:
        ProcessTracking.process_failed('generate_paths_apriori',
                                       {'error': traceback.format_exc()})
        raise
Beispiel #3
0
def get_queries():
    queries = dict()

    for name, query in DataDefinitions.get_queries().iteritems():
        queries[name] = query.get_dict()

    return queries
Beispiel #4
0
def get_definitions():
    import DataDefinitions
    hashes = set()
    for path_el in DataDefinitions.get_elements():
        if path_el.md5_id() in hashes:
            print path_el.md5_id(), 'exists'
        else:
            print path_el.md5_id()

        hashes.add(path_el.md5_id())

        print path_el.get_dict()

    el_count = len(DataDefinitions.get_elements())
    hashes_count = len(hashes)
    # print el_count, math.factorial(el_count)/math.factorial(el_count-3)/math.factorial(2-1)
    print el_count, hashes_count, math.factorial(el_count) / (
        math.factorial(el_count - 3) * math.factorial(3))
Beispiel #5
0
def get_queries(settings):
    filters = dict()

    allowed_stgs = UserTools.get_allowed_stgs(g.user)

    for name, query in DataDefinitions.get_queries().iteritems():
        if allowed_stgs is not None and query.q == 'stg' and len(
                allowed_stgs) == 1:
            continue
        filters[query.md5_id()] = query.get_dict(
            replace_vars={'{cp_label}': settings['cp_label']})

    return filters
Beispiel #6
0
def generate_paths_apriori():
    import DataDefinitions
    ProcessTracking.process_start('generate_paths_apriori')
    run_on_temp_data()
    try:
        Path.drop()
        Path.db_setup()
        # Path.generate_paths(DataDefinitions.elements)
        Path.generate_paths_with_generator(DataDefinitions.get_elements())
        ProcessTracking.process_done('generate_paths_apriori')
    except:
        ProcessTracking.process_failed('generate_paths_apriori',
                                       {'error': traceback.format_exc()})
        raise
Beispiel #7
0
def generate_definitions():
    """
    Generated definitions
    """
    import DataDefinitions
    ProcessTracking.process_start('generate_definitions')
    # DataDefinitions.create_definitions()
    DataDefinitions.generate_definitions_in_db()
    DataDefinitions.load_definitions_from_db()
    DataDefinitions.save_definitions_to_meta_data()
    ProcessTracking.process_done('generate_definitions')
Beispiel #8
0
def test_definitions_db():
    import DataDefinitions
    run_on_temp_data()
    DataDefinitions.load_definitions_from_db()
    print 'loaded'
    DataDefinitions.save_definitions_to_meta_data()
    print 'saved'
    md = MetaData.find_by_id('definitionsDate')
    print type(md.data), md.data

    DataDefinitions.load_definitions_from_meta_data()
    print 'loaded'
Beispiel #9
0
def permutations():
    import DataDefinitions
    elements = DataDefinitions.get_elements()[0:91]

    k = 7
    # elements = elements
    comb_count = math.factorial(len(elements)) / \
                 (math.factorial(len(elements) - k) * math.factorial(k))

    index = 0L
    start = time.time()
    for els in itertools.combinations(elements, k):
        if index % 100000 == 0:
            print index, '/', comb_count, 'time:', time.time() - start

        index += 1

    print index, '/', comb_count, 'time:', time.time() - start
Beispiel #10
0
def check_path_counts():
    import DataDefinitions
    run_on_temp_data()
    all_paths = Path.find({})
    elements = [
        pe for pe in DataDefinitions.get_elements()
        if not pe.query.ignore and pe.condition.name != 'success'
    ]
    ba = Student.get_students_bitarray(elements, {
        'ignore': False,
        'finished': True
    })

    for path in all_paths:
        db_query = {'ignore': False, 'finished': True}
        all_pe = []
        for pe in path.filter_elements:
            pe.get_db_query(db_query)
            all_pe.append(pe)

        db_query_count = db_query.copy()

        path_count_ba = ba.count_matching(path.filter_elements)
        path_count = Student.find(db_query_count).count()
        for pe in path.elements:
            pe.get_db_query(db_query)
            all_pe.append(pe)

        path_matched_ba = ba.count_matching(all_pe)
        path_matched = Student.find(db_query).count()

        if path_count != path.count or path_matched != path.matched:
            print 'path ba:', path_matched_ba, '/', path_count_ba, ' db:', path_matched, '/', path_count, '!=', path.get_str(
            )
            print '\t count', db_query_count
            print '\t matched', db_query
Beispiel #11
0
def find_path_elements():
    import DataDefinitions
    for pe in DataDefinitions.get_elements_by_query(Query('success')):
        print hash(pe)
        print pe.get_dict()
Beispiel #12
0
def export_definitions():
    import DataDefinitions
    json = DataDefinitions.export_definitions_from_db()
    with open('data/definitions.json', 'w') as fd:
        fd.write(json.encode('utf-8'))
    print 'done'
Beispiel #13
0
def save_defintions_to_db():
    import DataDefinitions
    DataDefinitions.save_defintions_to_db()
    print 'Done'
Beispiel #14
0
def is_field_allowed(field, user_role, query_types):
    if field not in DataDefinitions.get_queries() and field not in query_types:
        return False
    if not DB.Student.is_field_allowed(field, user_role):
        return False
    return True
Beispiel #15
0
def handle():
    # if request.method == 'POST':
    # name = request.form['name']

    status = 200
    query_types = {
        'ident': 'int',
        'status': 'int',
        'risk.mean': 'float',
        'risk.avg': 'float',
        'risk.median': 'float',
        'risk.median_scaled': 'float',
        'risk_all.median_scaled': 'float',
        'risk_stg.median_scaled': 'float',
        'risk_degree.median_scaled': 'float',
        'risk.q25': 'float',
        'risk.q75': 'float'
    }

    sortable = DB.Student().__dict__.keys()
    sortable.extend([
        '_id', 'risk.median', 'risk.median_scaled', 'risk_all.median_scaled',
        'risk_stg.median_scaled', 'risk_degree.median_scaled', 'risk.mean',
        'risk.q25', 'risk.q75', 'semester_data.sem_1.bonus_total',
        'semester_data.sem_1.grade', 'semester_data.sem_1.delayed',
        'semester_data.sem_1.failed', 'semester_data.sem_1.successful',
        'semester_data.sem_1.count_KL', 'semester_data.sem_2.bonus_total',
        'semester_data.sem_2.grade', 'semester_data.sem_2.delayed',
        'semester_data.sem_2.failed', 'semester_data.sem_2.successful',
        'semester_data.sem_2.count_KL', 'semester_data.sem_3.bonus_total',
        'semester_data.sem_3.grade', 'semester_data.sem_3.delayed',
        'semester_data.sem_3.failed', 'semester_data.sem_3.successful',
        'semester_data.sem_3.count_KL'
    ])

    limit = request.args.get('limit', default=20, type=int)
    start = request.args.get('start', default=0, type=int)
    sort1 = request.args.get('sort1', default='_id,-1').split(',')
    sort2 = request.args.get('sort2', default='').split(',')
    with_definitions = request.args.get('definitions', default='false')
    mlist = request.args.get('mlist', default=None)
    do_calc = request.args.get('do_calc', default=None)
    groups = request.args.get('groups', default=None)
    single_groups = request.args.get('single_groups', default=None)
    calculations = request.args.get('calculations', default=None)
    is_csv = request.args.get('output', default='json') == 'csv'

    user_role = g.user_role

    ret = {
        'start': start,
        'limit': limit,
        'count': 0,
        'list': None,
        'query': None,
        'sort': None
    }
    if with_definitions == 'true':
        ret['definitions'] = get_definitions()

    db_query = dict()
    db_queries = []  # for restrictions
    db_sort = []
    if len(sort1) == 2 and sort1[0] in sortable:
        db_sort.append((sort1[0], int(sort1[1])))
    if len(sort2) == 2 and sort2[0] in sortable:
        db_sort.append((sort2[0], int(sort2[1])))

    settings = DB.Settings.load_dict([
        'lights', 'main_risk_group', 'hide_finished_ident_data',
        'hide_finished_after_days', 'student_ident_string'
    ])

    # filter by MarkedList
    if mlist is not None:
        ml = DB.MarkedList.find_one({'_id': mlist})
        if ml is not None and ml.is_allowed(g.username, user_role):
            student_ids = list(ml.list)
            if not settings['student_ident_string']:
                student_ids = [int(x) for x in student_ids]
            db_query['_id'] = {'$in': student_ids}
            ret['mlist'] = ml.get_dict()
            ret['mlist']['is_writable'] = ml.is_writable(g.username, user_role)
        else:
            return respond({'error': 'invalid_mlist'}, 400)

    filter_elements = request.args.get('elements', default=None)
    if filter_elements is not None:
        for fe_id in filter_elements.split(','):
            fe = DataDefinitions.get_element_by_hash(long(fe_id))
            if fe is not None:
                fe.get_db_query(
                    db_query
                )  # apply condition from filter element to db_query

    if settings['student_ident_string']:
        query_types['ident'] = 'str'

    for name in ['risk', 'risk_all', 'risk_stg', 'risk_degree']:
        if name in request.args:
            if request.args[name] == 'green':
                db_query[name + '.median_scaled'] = {
                    '$lt': settings['lights'][1]
                }
            if request.args[name] == 'yellow':
                db_query[name + '.median_scaled'] = {
                    '$gte': settings['lights'][1],
                    '$lt': settings['lights'][2]
                }
            if request.args[name] == 'red':
                db_query[name + '.median_scaled'] = {
                    '$gte': settings['lights'][2]
                }

    for name in request.args:
        if name in query_types:
            dbfield = name
            if name == 'ident':
                dbfield = '_id'
            try:
                value = request.args.get(name)
                db_query[dbfield] = DB.get_db_query_by_type(
                    value, query_types[name])
                continue

            except ValueError:
                return respond({'error': 'invalid_filter', 'name': name}, 400)

        if name == 'tags':
            try:
                value = request.args.get(name).split(',')
                db_query[name] = {'$all': value}
                continue

            except ValueError:
                return respond({'error': 'invalid_filter', 'name': name}, 400)

        if name == 'idents':
            try:
                if settings['student_ident_string']:
                    value = request.args.get(name).split(',')
                    db_query['_id'] = {'$in': value}
                else:
                    db_query['_id'] = DB.get_db_query_by_type(
                        request.args.get(name), 'in_intlist')
                continue

            except ValueError:
                return respond({'error': 'invalid_filter', 'name': name}, 400)

        if name not in DataDefinitions.get_queries():
            continue

        query = DataDefinitions.get_queries()[name]

        try:
            value = request.args.get(name)
            db_query[name] = get_db_query(value, query)

        except ValueError:
            return respond({'error': 'invalid_filter', 'name': name}, 400)

    allowed_stgs = UserTools.get_allowed_stgs(g.user)
    if allowed_stgs:
        db_queries.append({'stg': {'$in': allowed_stgs}})

    if settings['hide_finished_after_days'] != -1:
        earliest = datetime.utcfromtimestamp(
            time.time() - float(settings['hide_finished_after_days']) * 86400)
        db_queries.append({
            '$or': [{
                'finished': True,
                'exm_date': {
                    '$gt': earliest
                }
            }, {
                'finished': False
            }]
        })

    if len(db_queries) > 0:
        db_queries.append(db_query)
        db_query = {'$and': db_queries}

    if groups is not None:
        if not UserTools.has_right('students_data',
                                   user_role) and not UserTools.has_right(
                                       'student_analytics', user_role):
            return respond({'error': 'no_rights'}, 403)

        allowed_groups = []

        if not isinstance(groups, unicode):
            return respond({'error': 'invalid_groups'}, 400)

        for name in groups.split(','):
            if not is_field_allowed(name, g.user_role, query_types):
                return respond({'error': 'invalid_group', 'name': name}, 400)
            allowed_groups.append(name)

        allowed_calculations = list()
        allowed_ops = ['sum', 'avg', 'max', 'min', 'addToSet']
        if isinstance(calculations, unicode):
            for full_name in calculations.split(','):
                op, name = full_name.split('.', 1)
                if not is_field_allowed(name, g.user_role,
                                        query_types) or op not in allowed_ops:
                    continue
                allowed_calculations.append({'field': name, 'op': op})

        ret['groups'] = allowed_groups
        ret['calculations'] = allowed_calculations
        ret['group_results'] = DB.Student.calc_groups(allowed_groups, db_query,
                                                      allowed_calculations)

    elif single_groups is not None:
        if not UserTools.has_right('students_data',
                                   user_role) and not UserTools.has_right(
                                       'student_analytics', user_role):
            return respond({'error': 'no_rights'}, 403)

        allowed_groups = []

        if not isinstance(single_groups, unicode):
            return respond({'error': 'invalid_groups'}, 400)

        for name in single_groups.split(','):
            if not is_field_allowed(name, g.user_role, query_types):
                return respond({'error': 'invalid_group', 'name': name}, 400)
            allowed_groups.append(name)

        allowed_calculations = list()
        allowed_ops = ['sum', 'avg', 'max', 'min', 'addToSet']
        if isinstance(calculations, unicode):
            for full_name in calculations.split(','):
                op, name = full_name.split('.', 1)
                if not is_field_allowed(name, g.user_role,
                                        query_types) or op not in allowed_ops:
                    continue
                allowed_calculations.append({'field': name, 'op': op})

        ret['single_groups'] = allowed_groups
        ret['calculations'] = allowed_calculations
        ret['group_results'] = DB.Student.calc_single_groups(
            allowed_groups, db_query, allowed_calculations)

    elif do_calc == 'sums':
        if not UserTools.has_right('students_data',
                                   user_role) and not UserTools.has_right(
                                       'student_analytics', user_role):
            return respond({'error': 'no_rights'}, 403)

        ret['sums'] = DB.Student.calc_sums(db_query)

    elif do_calc == 'avgs':
        if not UserTools.has_right('students_data',
                                   user_role) and not UserTools.has_right(
                                       'student_analytics', user_role):
            return respond({'error': 'no_rights'}, 403)

        ret['avgs'] = None
        risk_values_allowed_key = 'risk_value_' + user_role
        settings = DB.Settings.load_dict([risk_values_allowed_key])
        avgs = DB.Student.calc_avgs(db_query)
        if avgs:
            ret['avgs'] = {}
            for key, value in avgs.iteritems():
                if 'risk' in key and not settings.get(risk_values_allowed_key,
                                                      True):
                    continue
                if key in DB.Student.restricted_fields:
                    if UserTools.has_right(DB.Student.restricted_fields[key],
                                           user_role):
                        ret['avgs'][key] = value
                else:
                    ret['avgs'][key] = value

    elif is_csv:
        if not UserTools.has_right('students_data', user_role):
            return respond({'error': 'no_rights'}, 403)

        cursor = DB.Student.find(db_query, sort=db_sort)
        return respond_csv(cursor, ret)

    else:
        if not UserTools.has_right('students_data', user_role):
            return respond({'error': 'no_rights'}, 403)

        if not 1 <= limit <= 1000:
            return respond({'error': 'invalid_limit'}, 400)

        try:
            cursor = DB.Student.find(db_query,
                                     limit=limit,
                                     skip=start,
                                     sort=db_sort)
            ret['count'] = cursor.count()
            ret['list'] = [
                s.get_dict(user_role,
                           hide_finished_ident_data=settings[
                               'hide_finished_ident_data']) for s in cursor
            ]
        except DB.errors.OperationFailure as e:
            ret['count'] = 0
            ret['error'] = e.message
            status = 500

    ret['query'] = repr(db_query)
    ret['sort'] = repr(db_sort)

    return respond(ret, status)
Beispiel #16
0
def get_definitions():
    data = {
        'path_elements': {},
        'restricted': []  # list of restricted fields
    }
    user_role = g.user_role

    for field, role in DB.Student.restricted_fields.iteritems():
        if not UserTools.has_right(role, user_role):
            data['restricted'].append(field)

    data['list_identification_data'] = UserTools.has_right(
        'list_identification_data', user_role)

    allowed_stgs = UserTools.get_allowed_stgs(g.user)

    for pe in DataDefinitions.get_elements():
        if allowed_stgs is not None and pe.query.q == 'stg_original' \
                and DB.Course.get_mapped_short(pe.condition.compare_value) not in allowed_stgs:
            continue
        if allowed_stgs is not None and pe.query.q == 'stg' \
                and (pe.condition.compare_value not in allowed_stgs or len(allowed_stgs) == 1):
            continue

        data['path_elements'][pe.md5_id()] = pe.get_dict(query_id=True)

    last_date = DB.MetaData.find_by_id('lastDate')
    data[
        'lastDate'] = last_date.data['date'] if last_date is not None else None

    data['user_roles'] = UserTools.user_roles.keys()

    risk_values_allowed_key = 'risk_value_' + user_role
    settings = DB.Settings.load_dict([
        risk_values_allowed_key, 'generate_risk_group_all',
        'generate_risk_group_stg', 'generate_risk_group_degree',
        'main_risk_group', 'compare_averages', 'cp_label', 'hide_resigned',
        'hide_median_risk', 'hide_student_fields', 'hide_applicant_fields'
    ])

    data['queries'] = get_queries(settings)

    data['lights'] = DB.Settings.load_dict_for_key('lights')
    data['generate_risk_group_all'] = settings['generate_risk_group_all']
    data['generate_risk_group_stg'] = settings['generate_risk_group_stg']
    data['generate_risk_group_degree'] = settings['generate_risk_group_degree']
    data['main_risk_group'] = settings['main_risk_group']
    data['risk_value_allowed'] = settings.get(risk_values_allowed_key, True)
    data['compare_averages'] = settings['compare_averages']
    data['hide_resigned'] = settings['hide_resigned']
    data['hide_median_risk'] = settings['hide_median_risk']
    data['hide_student_fields'] = settings['hide_student_fields']
    data['hide_applicant_fields'] = settings['hide_applicant_fields']

    data['tags'] = []
    for item in DB.Tag.find({}, sort=[['order', 1]]):
        data['tags'].append(item.get_dict())

    data['students_view'] = g.students_view

    return data