Ejemplo n.º 1
0
#!/usr/bin/python
# -*- coding: utf-8 -*- #

from wfMySQL import MySQLConnector
from filter import dimension
from wfDatabase import wfdb
import codecs

mysql = MySQLConnector()

# Instance Database List Class
obj_db = wfdb()

# Get category and segment View name list
view_list = obj_db.getViewList()


def generateGexf(list_terms, category, chi_kind):
    len_term = len(list_terms)
    outfile = codecs.open('gexf/' + chi_kind + '_' + str(category) + '.gexf', 'w', 'utf-8')
    outfile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    outfile.write('<gexf xmlns="http://www.gexf.net/1.2draft" version="1.2">\n')
    outfile.write('<graph mode="static" defaultedgetype="undirected">\n')
    outfile.write('<nodes>\n')
    for i in range(len_term):
        outfile.write('<node id="%s" label="%s" />\n' % (str(i), list_terms[i]))
    outfile.write('</nodes>\n')
    x = 0
    outfile.write('<edges>\n')
    for i in range(5):
        print 'Building edge in category-"' + str(category) + '", Dataset: ' + view_list[i]
Ejemplo n.º 2
0
# Get confidence interval
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), sp.sem(a)
    h = se * sp.t._ppf((1 + confidence) / 2., n - 1)
    return [m, h, m - h, m + h]

# Instance dimension class
obj_dims = dimension()

# Instance Database list class
obj_db = wfdb()

# Instance mysql connector class
mysql = MySQLConnector()

# Get Average CHI Score Dictionary
dict_avg_chi = obj_dims.getAvgCHIList()

# Get Maxiunm CHI Score Dictionary
dict_max_chi = obj_dims.getMaxCHIList()

# Filter CHI Score dictionary by threshold
dict_flted_avgchi = obj_dims.doFilteredList(avg_filter_threshold, dict_avg_chi)
dict_flted_maxchi = obj_dims.doFilteredList(max_filter_threshold, dict_max_chi)

# Get Database View List
tfidf_viewlist = obj_db.getTFIDFViewList()

for source_db in range(5):
Ejemplo n.º 3
0
# Get most common element in list
def most_common(list_term):
    dict_count = {}
    for item in list_term:
        dict_count.setdefault(item, 0)
        dict_count[item] += 1
    return max(dict_count.iteritems(), key=itemgetter(1))[0]

# Instance dimension class
obj_dims = dimension()

# Instance Database list class
obj_db = wfdb()

# Instance mysql connector class
mysql = MySQLConnector()

# Get Average CHI Score Dictionary
dict_avg_chi = obj_dims.getAvgCHIList()

# Get Maxiunm CHI Score Dictionary
dict_max_chi = obj_dims.getMaxCHIList()

# Filter CHI Score dictionary by threshold
dict_flted_avgchi = obj_dims.doFilteredList(avg_filter_threshold, dict_avg_chi)
dict_flted_maxchi = obj_dims.doFilteredList(max_filter_threshold, dict_max_chi)

# Rest All value to 0.0 in dictionary
dict_avg_zero = dict.fromkeys(dict_flted_avgchi, 0.0)
dict_max_zero = dict.fromkeys(dict_flted_maxchi, 0.0)
Ejemplo n.º 4
0
    new_category = []
    for i in random_list:
        new_feature.append(feature_dataset[i])
        new_category.append(category_dataset[i])
    feature_dataset[:] = []
    category_dataset[:] = []
    return new_feature, new_category

# Instance dimension class
obj_dims = dimension()

# Instance Database list class
obj_db = wfdb()

# Instance mysql connector class
mysql = MySQLConnector()

# Get Average CHI Score Dictionary list
avg_chi_list = obj_dims.getAvgCHIList()

# Get Max CHI Score dictionary list
max_chi_list = obj_dims.getMaxCHIList()

# Filter list by CHI score
filted_avg_list = obj_dims.doFilteredList(avg_filter_num, avg_chi_list)
filted_max_list = obj_dims.doFilteredList(max_filter_num, max_chi_list)

# Get database View List
tfidf_viewlist = obj_db.getTFIDFViewList()

# Length of vector dimension
Ejemplo n.º 5
0
#!/usr/bin/python
# -*- coding: utf-8 -*- #

from wfMySQL import MySQLConnector
from wfDatabase import wfdb
import codecs
import re
import enchant

# How many terms were you want to get
limit = 300

mysql = MySQLConnector()

# Instance Database List Class
obj_db = wfdb()

ptn1 = re.compile("^[\w\d]*$")
ptn2 = re.compile("^[\d]*$")
dic = enchant.Dict("en_US")


def verifyEngNum(term):
    if ptn1.match(term):
        if ptn2.match(term):
            return 1
        else:
            if dic.check(term):
                return 2
            else:
                return 3
Ejemplo n.º 6
0
#!/usr/bin/python
# -*- coding: utf-8 -*- #

from wfMySQL import MySQLConnector

# Instance Database connector class
mysql = MySQLConnector()


def pairReliablility(user1, user2, coder_clsfi):
    len_n = len(coder_clsfi)
    M = 0
    for key, value in coder_clsfi.items():
        if value[user1] == value[user2]:
            M += 1
    rebilty = (2.0 * M) / (2.0 * len_n)
    print "User: "******", " + str(user2)
    print "M (Number of totally agreement): " + str(M)
    print "N1,N2 (Should agree with number): " + str(len_n)
    print "Mutual consent degree = 2M/(N1+N2): " + str(rebilty) + "\n\n"

sql_get = "SELECT `SamplingNo`, `ClsNo1`, `UserId` FROM `CoderCompare` ORDER BY `CoderCompare`.`SamplingNo` ASC"

result = mysql.queryrows(sql_get)

coder_clsfi = {}

for row in result:
    sno = int(row[0])
    cls = int(row[1])
    user = int(row[2])