) x += 1 outfile.write("</edges>\n") outfile.write("</graph></gexf>\n") outfile.close() # Get TFIDF table list tfidf_tblist = obj_db.getTFIDFTableList() # Get Segmented table list seg_tblist = obj_db.getTableList() print seg_tblist for i in range(5): dict_tfidf = {} results = mysql.queryrows("SELECT * FROM `" + tfidf_tblist[i] + "`") for row in results: list_pairs = row[1].split("|") for pair in list_pairs: term_tfidf = pair.split("@") term = term_tfidf[0] tfidf = float(term_tfidf[1]) if term in dict_tfidf: continue else: dict_tfidf[term] = tfidf # Remove number and not english word in dictionary for key, val in dict_tfidf.items(): if verifyEngNum(key) == 1 or verifyEngNum(key) == 3: dict_tfidf.pop(key)
continue else: # Init ang reset feature and category dataset avg_feature = [] avg_category = [] max_feature = [] max_category = [] # Rest All value to 0.0 in dictionary dict_avg_zero = dict.fromkeys(dict_flted_avgchi, 0.0) dict_max_zero = dict.fromkeys(dict_flted_maxchi, 0.0) sql_getContent = "SELECT `ClsNo1`, `ScoreContent` FROM `" + tfidf_viewlist[view_num] + "`" # Get All result view_result = mysql.queryrows(sql_getContent) for view_row in view_result: view_content = view_row[1] term_tfidf_list = view_content.split("|") avg_tfidf_score_vactor = getTFIDFScoreVector(dict_avg_zero, term_tfidf_list) max_tfidf_score_vextor = getTFIDFScoreVector(dict_max_zero, term_tfidf_list) avg_feature.append(avg_tfidf_score_vactor) avg_category.append(int(view_row[0])) max_feature.append(max_tfidf_score_vextor) max_category.append(int(view_row[0])) print 'AVG Feature dataset: ' + str(len(avg_feature)) print 'AVG Category dataset: ' + str(len(avg_category)) print 'MAX Feature dataset: ' + str(len(max_feature))
def pairReliablility(user1, user2, coder_clsfi): len_n = len(coder_clsfi) M = 0 for key, value in coder_clsfi.items(): if value[user1] == value[user2]: M += 1 rebilty = (2.0 * M) / (2.0 * len_n) print "User: "******", " + str(user2) print "M (Number of totally agreement): " + str(M) print "N1,N2 (Should agree with number): " + str(len_n) print "Mutual consent degree = 2M/(N1+N2): " + str(rebilty) + "\n\n" sql_get = "SELECT `SamplingNo`, `ClsNo1`, `UserId` FROM `CoderCompare` ORDER BY `CoderCompare`.`SamplingNo` ASC" result = mysql.queryrows(sql_get) coder_clsfi = {} for row in result: sno = int(row[0]) cls = int(row[1]) user = int(row[2]) if sno in coder_clsfi: coder_clsfi[sno][user] = cls else: coder_clsfi[sno] = {} coder_clsfi[sno][user] = cls print coder_clsfi