Ejemplo n.º 1
0
def load_data():
    if 'data.npz' in os.listdir('.'):
        logger.info('loading saved features')
        return np.load('data.npz')
    elif 'cleared.npz' in os.listdir('.'):
        logger.info('use cleared features')
        data = np.load('cleared.npz')
        X = data['X']
        Y = data['Y']
    else:
        logger.info('loading json')
        f = open('data.json', 'r')
        data = json_wrapper.loads(f.read())
        f.close()
        logger.info('json loaded')
        X = np.array(data['features'], np.float32)
        # clear features
        logger.info('clear features')
        X = VarianceThreshold().fit_transform(X)
        Y = np.array(data['grades'], np.float32)
        np.savez('cleared', **{'X': X, 'Y': Y})
        logger.info('clear features done')
    Y = grades_to_labels(Y)
    logger.info('num of good students: ' + str(sum(map(lambda x: 1 if x == 1 else 0, Y))))
    logger.info('num of normal students: ' + str(sum(map(lambda x: 1 if x == 0 else 0, Y))))
    logger.info('num of poor students: ' + str(sum(map(lambda x: 1 if x == -1 else 0, Y))))
    logger.info('feature selection')
    X = feature_selection(X, Y)
    X, Y = filtering(X, Y)
    logger.info('writing npz data')
    result = {'X': X, 'Y': Y}
    np.savez('data', **result)
    return result
Ejemplo n.º 2
0
def process(file_name, conn, term):
    f = open(file_name)
    content = f.read()
    obj = json_wrapper.loads(content)
    for name in obj:
        if obj[name]['category'] == "problem" and 'display_name' in obj[name][
                'metadata']:
            try:
                if '+' in name:
                    #course_id = name.split('+')[1]
                    #term_id = name.split('+')[2]
                    xml_id = name.split('+')[-1].split('@')[-1]
                else:
                    #course_id = "COMP102x"
                    #term_id = "2T2014"
                    xml_id = name.split('/')[-1]
                course_id = term.split('-')[0]
                term_id = term.split('-')[1]
                display_name = obj[name]['metadata']['display_name']
                problem_type = ' '.join(display_name.split(' ')[0:2])
                c = conn.cursor()
                c.execute(
                    "INSERT INTO all_courses_problems VALUES(%s, %s, %s, %s, %s);",
                    [course_id, term_id, xml_id, display_name, problem_type])
                conn.commit()
            except Exception:
                print('name = ' + name)
                raise
def register_task():
    """register a task
    request:
        {
            "desc": {
                    "input_data_type_id": 121, "time-range": "1m",
                    "functions": {
                        "1": {"name": "filter", "target": "status",
                            "conditions": [{"target": "cost", "operator": "bt", "param1": 100, "param2": 1000}, {}]},
                        "2": {"name": "average", "target": "cost", "tag": "avg_cost", "group_by": ["function", "status"]}
                    }
            }
        }
    """
    print(request.values.get('desc'))
    try:
        task_info = json_wrapper.loads(request.values.get('desc'))
    except:
        response = jsonify({'result': 'fail', 'info': 'broken json'})
        return response

    print(str(task_info))
    reg_result = __task_center.register_task(task_info)
    if reg_result['succ']:
        response = jsonify({'result': 'successful', 'task_id': reg_result['task_id']})
    else:
        response = jsonify({'result': 'fail', 'info': reg_result['info']})
    return response
Ejemplo n.º 4
0
def register_data_type():
    """register a data type
    request:
        {"desc": {"title": "Title", "attrs": {"userid": "string", "function": "string", "status": "integer", "cost": "real"}}}
    """
    try:
        data_type = json_wrapper.loads(request.values.get('desc'))
    except:
        response = jsonify({'result': 'fail', 'info': 'broken json'})
        return response
    result = __data_center.register_data_type(data_type)
    # deal response due to the result
    response = jsonify({'result': 'unhandled'})
    if result['succ']:
        response = jsonify({
            'result': 'successful',
            'data_type_id': result['id']
        })
    else:
        if result['info'] == 'type_error':
            response = jsonify({
                'result':
                'fail',
                'info':
                'type not accept: ' + str(result['attr'])
            })
        if result['info'] == 'db_error':
            response = jsonify({'result': 'fail', 'info': 'database error'})
        else:
            response = jsonify({'result': 'fail', 'info': result['info']})
    return response
Ejemplo n.º 5
0
def register_task():
    """register a task
    request:
        {
            "desc": {
                    "input_data_type_id": 121, "time-range": "1m",
                    "functions": {
                        "1": {"name": "filter", "target": "status",
                            "conditions": [{"target": "cost", "operator": "bt", "param1": 100, "param2": 1000}, {}]},
                        "2": {"name": "average", "target": "cost", "tag": "avg_cost", "group_by": ["function", "status"]}
                    }
            }
        }
    """
    print(request.values.get('desc'))
    try:
        task_info = json_wrapper.loads(request.values.get('desc'))
    except:
        response = jsonify({'result': 'fail', 'info': 'broken json'})
        return response

    print(str(task_info))
    reg_result = __task_center.register_task(task_info)
    if reg_result['succ']:
        response = jsonify({
            'result': 'successful',
            'task_id': reg_result['task_id']
        })
    else:
        response = jsonify({'result': 'fail', 'info': reg_result['info']})
    return response
Ejemplo n.º 6
0
def query_data(data_type_id, start_time, end_time):
    session = Session()
    rows = session.query(DataInfo.id, DataInfo.attrs, DataInfo.time)\
        .filter(DataInfo.time >= start_time, DataInfo.time <= end_time, DataInfo.data_type_id == data_type_id)
    result = []
    for info in rows:
        temp = {'id': info[0], 'attrs': json_wrapper.loads(info[1]), 'time': info[2]}
        result.append(temp)
    session.close()
    return result
Ejemplo n.º 7
0
def get_attempts(json):
    json = json.replace('\\\\', '\\')
    try:
        temp = json_wrapper.loads(json)
    except Exception:
        print json
        raise
    if 'attempts' in temp:
        return temp['attempts']
    else:
        return 0
Ejemplo n.º 8
0
def query_task_by_id(task_id):
    session = Session()
    rows = session.query(TaskInfo.id, TaskInfo.input_data_type_id, TaskInfo.time_range, TaskInfo.functions,
                         TaskInfo.result_data_type_id, TaskInfo.title).filter(TaskInfo.id == task_id)
    for info in rows:
        temp = {'id': int(info[0]), 'input_data_type_id': int(info[1]),
                'time_range': json_wrapper.encode(info[2], 'utf-8'),
                'functions': json_wrapper.loads(info[3]), 'result_data_type_id': int(info[4]),
                'title': json_wrapper.encode(info[5], 'utf-8')}
        result = temp
    session.close()
    return result
Ejemplo n.º 9
0
def query_data(data_type_id, start_time, end_time):
    session = Session()
    rows = session.query(DataInfo.id, DataInfo.attrs, DataInfo.time)\
        .filter(DataInfo.time >= start_time, DataInfo.time <= end_time, DataInfo.data_type_id == data_type_id)
    result = []
    for info in rows:
        temp = {
            'id': info[0],
            'attrs': json_wrapper.loads(info[1]),
            'time': info[2]
        }
        result.append(temp)
    session.close()
    return result
Ejemplo n.º 10
0
def process(file_name, term):
    f = open(file_name)
    line = f.readline()
    term = term.replace('.', '_').replace('-', '_')
    create_table(term + '_clickstream_events')
    while line:
        obj = json_wrapper.loads(line)
        text = [
            obj['context']['user_id'] if 'user_id' in obj['context'] else None,
            obj['username'], obj['session'] if 'session' in obj else None,
            obj['event_type'], obj['name'] if 'name' in obj else None,
            obj['event_source'], obj['time'],
            obj['referer'] if 'referer' in obj else None
        ]
        insert_table(text, term + '_clickstream_events')
        line = f.readline()
    f.close()
Ejemplo n.º 11
0
def process(file_name, conn, term, module_id, split_type, split_id):
    f = open(file_name)
    term = term.replace('.', '_').replace('-', '_')
    create_table(conn, term + '_element')
    element = {}
    obj = json_wrapper.loads(f.read())
    discussion_dict = {}
    for key in obj:
        if split_type(key) == 'discussion' and 'discussion_id' in obj[key]['metadata']:
            discussion_dict[split_id(key)] = obj[key]['metadata']['discussion_id']
    for key in obj:
        child = []
        if split_type(key) == 'course':
            continue
        else:
            for children in obj[key]['children']:
                if split_type(children) == 'discussion':
                    child.append(split_type(children) + '@' + discussion_dict[split_id(children)])
                else:
                    child.append(split_type(children) + '@' + split_id(children))
            element[split_type(key) + '@' + split_id(key)] = child
    for mid in module_id:
        name = mid.split('@')[0]
        num = mid.split('@')[-1]
        sequential = element['chapter@' + num]
        for sqt in sequential:
            vertical = element[sqt]
            for vert in vertical:
                real_element = element[vert]
                for relment in real_element:
                    if len(element[relment]) > 0:
                        lib_problem = element[relment]
                        for lproblem in lib_problem:
                            eid = lproblem.split('@')[-1]
                            etype = lproblem.split('@')[0]
                            print(etype + ' ' + eid)
                            text = [num, name, etype, eid]
                            insert_table(conn, text, term + '_element')
                    else:
                        eid = relment.split('@')[-1]
                        etype = relment.split('@')[0]
                        print(etype + '-------' + eid)
                        text = [num, name, etype, eid]
                        insert_table(conn, text, term + '_element')

    f.close()
Ejemplo n.º 12
0
def insert_comment_info_table(conn, file):
    f = open(file)
    content = f.read()
    obj = json_wrapper.loads(content)
    for element in obj['elements']:
        course_id = element['context']['definition']['courseId']
        user_id = element['userId']
        comment = element['comments']['generic']['definition']['value']
        comment = clean_comment(comment)
        rating = element['rating']['value']
        timestamp = element['timestamp']
        if 'completed' in file:
            completed = 1
        else:
            completed = 0
        c = conn.cursor()
        c.execute(
            "INSERT INTO coursera_comment_info VALUES(%s, %s, %s, %s, %s, %s);",
            [course_id, user_id, comment, rating, timestamp, completed])
        conn.commit()
Ejemplo n.º 13
0
 def __init__(self, path=None):
     if path is None:
         self.__learn_rate = 0.01
         self.__default_weight = 0.5
         self.__input_nodes = []
         self.__relations = {}
         self.__output_nodes = []
         self.__weights = {}
         self.__thetas = {}
     else:
         file = open(path, 'r')
         structure = json_wrapper.loads(file.readline(), 'utf-8')
         file.close()
         self.__learn_rate = structure['learn_rate']
         self.__default_weight = structure['default_weight']
         self.__input_nodes = structure['input_nodes']
         self.__output_nodes = structure['output_nodes']
         self.__relations = structure['relations']
         self.__weights = structure['weights']
         self.__thetas = structure['thetas']
Ejemplo n.º 14
0
def put_data():
    """put a data
    request:
        {"data": {"data_type_id": 5243212,
                    "attrs": {"userid":123, "function": "GET/userinfo", "status": 200, "cost": 12.59},
                    "time" : "2015-09-01 10:50:39"
                }
        }
    """
    try:
        data = json_wrapper.loads(request.values.get('data'))
    except:
        response = jsonify({'result': 'fail', 'info': 'broken json'})
        return response

    result = __data_center.put_data(data)
    if result['succ']:
        response = jsonify({'result': 'successful'})
    else:
        response = jsonify({'result': 'fail', 'info': result['info']})
    return response
Ejemplo n.º 15
0
def put_data():
    """put a data
    request:
        {"data": {"data_type_id": 5243212,
                    "attrs": {"userid":123, "function": "GET/userinfo", "status": 200, "cost": 12.59},
                    "time" : "2015-09-01 10:50:39"
                }
        }
    """
    try:
        data = json_wrapper.loads(request.values.get('data'))
    except:
        response = jsonify({'result': 'fail', 'info': 'broken json'})
        return response

    result = __data_center.put_data(data)
    if result['succ']:
        response = jsonify({'result': 'successful'})
    else:
        response = jsonify({'result': 'fail', 'info': result['info']})
    return response
Ejemplo n.º 16
0
def process(file_name, conn, term):
    f = open(file_name)
    line = f.readline()
    term = term.replace('.', '_').replace('-', '_')
    commentthread.create_table(conn, term + '_commentthread')
    comment.create_table(conn, term + '_comment')
    while line:
        obj = json_wrapper.loads(line)
        if obj['_type'] == "CommentThread":
            commentthread_text = [obj['_id']['$oid'], obj['votes']['up_count'], obj['votes']['down_count'],
                                  obj['votes']['count'], obj['votes']['point'], obj['thread_type'], obj['comment_count'],
                                  obj['title'], obj['body'], obj['updated_at']['$date'], obj['created_at']['$date'],
                                  obj['last_activity_at']['$date'], obj['commentable_id'].split('-')[-1], obj['author_id'], obj['author_username']]
            commentthread.insert_table(conn, commentthread_text, term + '_commentthread')

        if obj['_type'] == "Comment":
            comment_text = [obj['comment_thread_id']['$oid'], obj['votes']['up_count'], obj['votes']['down_count'],
                                  obj['votes']['count'], obj['votes']['point'],
                                  obj['body'], obj['updated_at']['$date'], obj['created_at']['$date'], obj['author_id'], obj['author_username']]
            comment.insert_table(conn, comment_text, term + '_comment')
        line = f.readline()
    f.close()
Ejemplo n.º 17
0
def insert_course_info_table(conn, file, table):
    c = conn.cursor()
    f = open(file)
    content = json_wrapper.loads(f.read())
    for item in content:
        p = []
        for column in [
                'course_id', 'course_name', 'instructor', 'rating', 'level'
        ]:
            if column not in item:
                p.append(None)
            elif column == 'rating':
                p.append(float(item[column].split(' ')[0]))
            else:
                p.append(item[column])
        try:
            c.execute("INSERT INTO " + table + " VALUES(%s, %s, %s, %s, %s);",
                      p)
        except:
            print(p)
            raise
    conn.commit()
Ejemplo n.º 18
0
def register_data_type():
    """register a data type
    request:
        {"desc": {"title": "Title", "attrs": {"userid": "string", "function": "string", "status": "integer", "cost": "real"}}}
    """
    try:
        data_type = json_wrapper.loads(request.values.get('desc'))
    except:
        response = jsonify({'result': 'fail', 'info': 'broken json'})
        return response
    result = __data_center.register_data_type(data_type)
    # deal response due to the result
    response = jsonify({'result': 'unhandled'})
    if result['succ']:
        response = jsonify({'result': 'successful', 'data_type_id': result['id']})
    else:
        if result['info'] == 'type_error':
            response = jsonify({'result': 'fail', 'info': 'type not accept: ' + str(result['attr'])})
        if result['info'] == 'db_error':
            response = jsonify({'result': 'fail', 'info': 'database error'})
        else:
            response = jsonify({'result': 'fail', 'info': result['info']})
    return response
Ejemplo n.º 19
0
     "SELECT module_id, student_id, grade, max_grade, course_id, state, created, modified, id FROM "
     + term + "_courseware_studentmodule" +
     " WHERE module_type = \"problem\" or module_type = \"openassessment\" and grade is not NULL;"
 )
 result = cursor.fetchall()
 create_grades_table(conn, term + "_students_grades")
 for row in result:
     student_id = row[1]
     grade = row[2]
     max_grade = row[3]
     created = row[6]
     modified = row[7]
     state = row[5]
     state = state.replace('\\\\', '\\')
     try:
         obj = json_wrapper.loads(state)
     except Exception as e:
         print state
         print row[8]
         print term
         raise e
     if 'attempts' in obj:
         attempt = obj['attempts']
     else:
         attempt = None
     if '+' in row[4]:
         course_id = row[4].split('+')[1]
         term_id = row[4].split('+')[-1]
     else:
         course_id = row[4].split('/')[1]
         term_id = row[4].split('/')[-1]
Ejemplo n.º 20
0
 conn = MySQLdb.connect(host="localhost",
                        user="******",
                        passwd="Mdb4Learn",
                        db="clickstream")
 for term in terms:
     table_name = ('HKUSTx-' + term + '-clickstream').replace('-',
                                                              '_').replace(
                                                                  '.', '_')
     file_name = dir + 'HKUSTx-' + term + '-clickstream.log'
     create_table(conn, table_name)
     for row in read_file.read(file_name):
         row['time'] = truncate(row['time'])
         if row['event_type'] == 'load_video':
             event = row['event']
             if isinstance(event, str):
                 event = json_wrapper.loads(event)
             if 'currentTime' not in event:
                 event['currentTime'] = None
             try:
                 insert_table(conn, [
                     'user_name', 'user_id', 'event_source', 'event_type',
                     'event_time', 'course_id', '`session`', 'video_id'
                 ], [
                     row['username'], row['context']['user_id'],
                     row['event_source'], row['event_type'], row['time'],
                     row['context']['course_id'], row['session'],
                     event['id']
                 ], table_name)
             except KeyError:
                 print("exception found in term " + term)
                 print(row['time'])
Ejemplo n.º 21
0
cursor.execute("SELECT xml_id, survey_type FROM Java_survey_questions;")
result = cursor.fetchall()
survey_map = {}
for row in result:
    survey_map[row[0]] = row[1]

# cursor = conn.cursor()
for table in table_prefix:
    cursor.execute("SELECT module_type, module_id, student_id, state FROM " + table + "_courseware_studentmodule;")
    result = cursor.fetchall()
    term_id = table
    for row in result:
        if row[0] == 'problem':
            xml_id = row[1].split('@')[-1]
            student_id = int(row[2])
            if xml_id in survey_map:
                survey_type = survey_map[xml_id]
                try:
                    state = json_wrapper.loads(row[3].replace('\\\\', '\\'))
                except Exception:
                    print(row[3])
                    raise
                if 'student_answers' in state:
                    for key in state['student_answers']:
                        question_id = int(key.split('_')[1]) - 1
                        if question_id < 15:
                            answer = int(state['student_answers'][key].split('_')[1])
                            cursor.execute("INSERT INTO Java_survey_answers VALUES(%s, %s, %s, %s, %s, %s)", [student_id, xml_id, survey_type, question_id, answer, term_id])
conn.commit()
conn.close()
Ejemplo n.º 22
0
def read(file_name):
    with open(file_name) as infile:
        for line in infile:
            yield json_wrapper.loads(line)
Ejemplo n.º 23
0
conn = MySQLdb.connect(host="localhost",
                       user="******",
                       passwd="Mdb4Learn",
                       db="eLearning")
for term in terms:
    term_id = term.split('-')[1]
    course_id = term.split('-')[0]
    table_name = ('HKUSTx-' + term + '-problem_set').replace('.', '_').replace(
        '-', '_')
    create_table(conn, table_name)
    cursor = conn.cursor()
    data = None
    problem_sets = {}
    with open(dir + 'HKUSTx-' + term + '-course_structure-prod-analytics.json',
              'r') as infile:
        data = json_wrapper.loads(infile.read())
    for key in data:
        if 'graded' in data[key]['metadata'] and data[key]['metadata'][
                'graded']:
            # xml_id = key.split('@')[-1]
            set_category = data[key]['metadata']['format']
            set_name = data[key]['metadata']['display_name']
            queue = Queue.Queue()
            for child in data[key]['children']:
                queue.put_nowait(child)
            while not queue.empty():
                id = queue.get_nowait()
                category = data[id]['category']
                if category in ['problem', 'openassessment']:
                    if '@' in id:
                        xml_id = id.split('@')[-1]
Ejemplo n.º 24
0
f = open(
    "/Users/maomaoyu/Downloads/e-learning/HKUSTx/hkustx-2016-09-11/HKUSTx-COMP102.1x-2T2015-prod.mongo"
)
of1 = open(
    '/Users/maomaoyu/Downloads/e-learning/HKUSTx/hkustx-2016-09-11/commentthread.txt',
    'w')
of2 = open(
    '/Users/maomaoyu/Downloads/e-learning/HKUSTx/hkustx-2016-09-11/comment.txt',
    'w')
line = f.readline()
# types = []
comment_text = []
commentthread_text = []
while line:
    obj = json_wrapper.loads(line)
    # print(obj['_type'])
    # threadtype
    if obj['_type'] == "CommentThread":
        commentthread_text = [
            obj['_id']['$oid'], obj['votes']['up_count'],
            obj['votes']['down_count'], obj['votes']['count'],
            obj['votes']['point'], obj['thread_type'], obj['comment_count'],
            obj['title'], obj['body'], obj['updated_at']['$date'],
            obj['created_at']['$date'], obj['last_activity_at']['$date']
        ]
        # for i in range(0, len(commentthread_text)):
        #     commentthread_text[i] = str(commentthread_text[i])
        commentthread_text = [str(x) for x in commentthread_text]
        output = '\t'.join(commentthread_text)
        output = output.replace('\n', '\\n')
Ejemplo n.º 25
0
        #         print(row)
        # continue
        if row['key'] == 'pageview':
            try:
                coursera.insert_table(conn, [
                    'user_name', 'page_url', '`timestamp`', '`key`',
                    '`session`'
                ], [
                    row['username'], row['page_url'], row['timestamp'],
                    row['key'], row['session']
                ], table_name)
            except Exception:
                print(row['timestamp'])
                raise
        elif row['key'] == 'user.video.lecture.action':
            value = json_wrapper.loads(row['value'])
            try:
                coursera.insert_table(conn, [
                    'user_name', 'page_url', '`timestamp`', '`key`',
                    '`session`', 'action_type', 'prev_time', 'cur_time',
                    'playback_rate'
                ], [
                    row['username'], row['page_url'], row['timestamp'],
                    row['key'], row['session'], value['type'],
                    value['prevTime'], value['currentTime'],
                    value['playbackRate']
                ], table_name)
            except Exception:
                print(row['timestamp'])
                raise
def query_data_type():
    session = Session()
    rows = session.query(DataTypeInfo.id, DataTypeInfo.title, DataTypeInfo.attrs)
    result = []
    for info in rows:
        temp = {'id': info[0], 'title': json_wrapper.encode(info[1], 'utf-8'), 'attrs': json_wrapper.loads(info[2])}
        result.append(temp)
    session.close()
    return result
 def to_dict(self):
     _dict = {'id': self.id, 'title': json_wrapper.encode(self.title, 'utf-8'),
              'attrs': json_wrapper.loads(self.attrs)}
     return _dict