Ejemplo n.º 1
0
def test(classifyFile, testres, lamda):
    feaId = []
    info = []
    for line in open("D:/worktmp/people/Gap.txt"):
        feaId.append(line.split()[0])
        info.append(string.atof(line.split()[1]))

    feasUsed = feaId[0:FEA_USED_NUM]
    feasUsed.extend(['5'])

    count = 0
    outputFile = open(testres, 'w+')
    for line in testSet:
        p1_log = 0
        p2_log = 0
        changed = False
        count += 1
        print count, len(testSet)

        record = line2Record(line, '1')
        real = record[0]
        vector = record[1]
        cols = vector.keys()

        for i in feasUsed:
            idx = i
            if idx in cols:

                c1 = cnt(idx, '1', vector[idx]) / float(cntClass['1'])
                c2 = cnt(idx, '2', vector[idx]) / float(cntClass['2'])
                if (c1 > 0 and c1 < FEA_THRESHOLD and c2 > 0
                        and c2 < FEA_THRESHOLD):
                    continue
                p1_log += math.log(
                    (cnt(idx, '1', vector[idx]) +
                     lamda)) - math.log(cntClass['1'] +
                                        lamda * distinctNums(idx, True))
                p2_log += math.log(
                    (cnt(idx, '2', vector[idx]) +
                     lamda)) - math.log(cntClass['2'] +
                                        lamda * distinctNums(idx, True))
                changed = True
        p1 = 0.0
        if changed == False:
            continue


#       p1 = cntClass['1'] / sum(cntClass.values())
        else:
            p1_log += math.log(
                (cntClass['1'] + lamda
                 )) - math.log(sum(cntClass.values()) + len(cntClass) * lamda)
            p2_log += math.log(
                (cntClass['2'] + lamda
                 )) - math.log(sum(cntClass.values()) + len(cntClass) * lamda)
            p1 = 1 / (math.exp(p2_log - p1_log) + 1)

        outputFile.write(str(p1) + '\t' + real + '\n')
Ejemplo n.º 2
0
def test(classifyFile, testres, lamda):
  feaId = []
  info = []
  for line in open("D:/worktmp/people/Gap.txt"):
    feaId.append(line.split()[0])
    info.append(string.atof(line.split()[1]))
  
  feasUsed = feaId[0:FEA_USED_NUM]
  feasUsed.extend(['5'])
  
  count = 0
  outputFile = open(testres, 'w+')
  for line in testSet:
    p1_log = 0;
    p2_log = 0;
    changed = False
    count += 1
    print count, len(testSet)
  
    record = line2Record(line, '1')
    real = record[0]
    vector = record[1]
    cols = vector.keys()
    
    for i in feasUsed:
      idx = i
      if idx in cols:
        
        c1 = cnt(idx, '1', vector[idx]) / float(cntClass['1'])
        c2 = cnt(idx, '2', vector[idx]) / float(cntClass['2'])
        if (c1 > 0 and c1 < FEA_THRESHOLD and c2 > 0 and c2 < FEA_THRESHOLD): 
          continue
        p1_log += math.log((cnt(idx, '1', vector[idx]) + lamda)) - math.log(cntClass['1'] + lamda * distinctNums(idx, True))
        p2_log += math.log((cnt(idx, '2', vector[idx]) + lamda)) - math.log(cntClass['2'] + lamda * distinctNums(idx, True))
        changed = True
    p1 = 0.0
    if changed == False:
      continue
#       p1 = cntClass['1'] / sum(cntClass.values())
    else:
      p1_log += math.log((cntClass['1'] + lamda)) - math.log(sum(cntClass.values()) + len(cntClass) * lamda)
      p2_log += math.log((cntClass['2'] + lamda)) - math.log(sum(cntClass.values()) + len(cntClass) * lamda)
      p1 = 1 / (math.exp(p2_log - p1_log) + 1)
    
    outputFile.write(str(p1) + '\t' + real + '\n')
Ejemplo n.º 3
0
def train(trainFile, classIdx, sparse):
    count = 0
    for line in open(trainFile):
        count += 1
        print count

        if count % NUM_ALL == NUM_TEST:
            testSet.append(line)
            continue

        record = line2Record(line, classIdx)
        clas = record[0]
        #     if clas == '2':
        #       cntClass[clas] = cntClass.get(clas, 0) + 4
        #     else:
        cntClass[clas] = cntClass.get(clas, 0) + 1
        for key, value in record[1].items():
            cntFeas[key] = cntFeas.get(key, {})
            cntFeas[key][clas] = cntFeas[key].get(clas, {})
            #       if clas == '2':
            #         cntFeas[key][clas][value] = cntFeas[key][clas].get(value, 0) + 4
            #       else:
            cntFeas[key][clas][value] = cntFeas[key][clas].get(value, 0) + 1
Ejemplo n.º 4
0
def train(trainFile, classIdx, sparse):
  count = 0
  for line in open(trainFile):
    count += 1
    print count
    
    if count % NUM_ALL == NUM_TEST:
      testSet.append(line)
      continue
    
    record = line2Record(line, classIdx)
    clas = record[0]
#     if clas == '2':
#       cntClass[clas] = cntClass.get(clas, 0) + 4
#     else:
    cntClass[clas] = cntClass.get(clas, 0) + 1
    for key, value in record[1].items():
      cntFeas[key] = cntFeas.get(key, {})
      cntFeas[key][clas] = cntFeas[key].get(clas, {})
#       if clas == '2':
#         cntFeas[key][clas][value] = cntFeas[key][clas].get(value, 0) + 4
#       else:
      cntFeas[key][clas][value] = cntFeas[key][clas].get(value, 0) + 1
Ejemplo n.º 5
0
def makeDataSet(trainFile, cIdx):
  dataSet = []
  for line in open(trainFile):
    dataSet.append(line2Record(line, cIdx))
  return dataSet