Ejemplo n.º 1
0
    optparser.add_option('-o',
                         '--outputFile',
                         dest='output',
                         help='fileName',
                         default=None)

    (options, args) = optparser.parse_args()

    if options.input is None:
        inFile = sys.stdin
    elif options.input is not None:
        inFile = ioFile.dataFromFile(options.input)
    else:
        print 'No filename specified, system with exit\n'
        sys.exit('System will exit')

    if options.output is None:
        outFile = "arxiv-category_dict.pkl"
    elif options.output is not None:
        outFile = options.output

    data_iterator = inFile
    category_dict = {}

    for line in data_iterator:
        line = line.rstrip('\n')
        line = line.split('\t')
        category_dict[line[0]] = line[1]

    ioFile.save_object(category_dict, outFile)
Ejemplo n.º 2
0
                      help='fileName',
                      default=None)
     
 (options, args) = optparser.parse_args()
 
 if options.input is None:
         fname = sys.stdin
 elif options.input is not None:
         fname = options.input
 else:
         print 'No filename(.pkl) specified, system with exit\n'
         sys.exit('System will exit')
         
 if options.num is None:
         n_all_term = sys.stdin
 elif options.num is not None:
         n_all_term = int(options.num)
 else:
         print 'No number of conversion specified, system with exit\n'
         sys.exit('System will exit')
                    
 if options.output is None:
         outFile = 'convert_prob.pkl'
 elif options.output is not None:
         outFile = options.output
         
 prob = ioFile.load_object(fname)
 convert_prob = convertProb(prob, n_all_term)
 #print convert_prob.shape
 ioFile.save_object(convert_prob, outFile)
Ejemplo n.º 3
0
if __name__ == "__main__":
    optparser = OptionParser()
    optparser.add_option('-f', '--inputFile',
                         dest='input',
                         help='fileName',
                         default=None)
    optparser.add_option('-o', '--outputFile',
                         dest='output',
                         help='fileName',
                         default=None)
    (options, args) = optparser.parse_args()
    
    if options.input is None:
            fname = sys.stdin
    elif options.input is not None:
            fname = options.input
    else:
            print 'No filename specified, system with exit\n'
            sys.exit('System will exit')    

    if options.output is None:
            outFile = 'prob.pkl'
    elif options.output is not None:
            outFile = options.output            
            
          
    prob = readProb(fname)
    
    ioFile.save_object(prob, outFile)
Ejemplo n.º 4
0
                         default=None)
    optparser.add_option('-o', '--outputFile',
                         dest='output',
                         help='fileName',
                         default=None)    

    (options, args) = optparser.parse_args()
    
    if options.input is None:
            inFile = sys.stdin
    elif options.input is not None:
            inFile = ioFile.dataFromFile(options.input)
    else:
            print 'No filename specified, system with exit\n'
            sys.exit('System will exit')
            
    if options.output is None:
            outFile = "arxiv-category_dict.pkl"
    elif options.output is not None:
            outFile = options.output
             
    data_iterator = inFile
    category_dict = {}
    
    for line in data_iterator:
        line = line.rstrip('\n')
        line = line.split('\t')
        category_dict[line[0]] = line[1]
        
    ioFile.save_object(category_dict, outFile)
Ejemplo n.º 5
0
    optparser = OptionParser()
    optparser.add_option('-f',
                         '--inputFile',
                         dest='input',
                         help='filename containing xml',
                         default=None)
    optparser.add_option('-o',
                         '--outputFile',
                         dest='output',
                         help='fileName',
                         default=None)

    (options, args) = optparser.parse_args()

    if options.input is None:
        fname = sys.stdin
    elif options.input is not None:
        fname = (options.input)
    else:
        print 'No filename specified, system with exit\n'
        sys.exit('System will exit')

    if options.output is None:
        outFile = "acm-class_dict.pkl"
    elif options.output is not None:
        outFile = options.output

    acm_class_dict = acmClassification(fname)

    ioFile.save_object(acm_class_dict, outFile)
Ejemplo n.º 6
0
    optparser.add_option('-f', '--inputFile',
                         dest='input',
                         help='filename containing xml',
                         default=None)
    optparser.add_option('-o', '--outputFile',
                         dest='output',
                         help='fileName',
                         default=None)    

    (options, args) = optparser.parse_args()
    
    if options.input is None:
            fname = sys.stdin
    elif options.input is not None:
            fname = (options.input)
    else:
            print 'No filename specified, system with exit\n'
            sys.exit('System will exit')
            
    if options.output is None:
            outFile = "acm-class_dict.pkl"
    elif options.output is not None:
            outFile = options.output             
            
    acm_class_dict = acmClassification(fname)
    
    ioFile.save_object(acm_class_dict, outFile)
    
    

Ejemplo n.º 7
0
            fname_f = sys.stdin
    elif options.input_f is not None:
            fname_f = options.input_f
    else:
            print 'No filename(.pkl) specified, system with exit\n'
            sys.exit('System will exit')

    if options.input_g is None:
            fname_g = sys.stdin
    elif options.input_g is not None:
            fname_g = options.input_g
    else:
            print 'No filename(.pkl) specified, system with exit\n'
            sys.exit('System will exit')
            
    if options.output is None:
            outFile = 'distance.pkl'
    elif options.output is not None:
            outFile = options.output            
            

    
    prob_f = ioFile.load_object(fname_f)
    prob_g = ioFile.load_object(fname_g)
    
    all_distance, count = distanceBetweenTwoYears(prob_f, prob_g)
    
    print count, len(all_distance)
    
    ioFile.save_object(all_distance, outFile)
    
Ejemplo n.º 8
0
    if options.prob is None:
        inFile = sys.stdin
    elif options.prob is not None:
        inFile = options.prob
    else:
        print 'No filename specified, system with exit\n'
        sys.exit('System will exit')

    if options.vocabulary is not None:
        fname = options.vocabulary
        all_term = allTerm(fname)
    else:
        fname = None
        all_term = None

    if options.output is None:
        outFile = 'topic.pkl'
    elif options.output is not None:
        outFile = options.output

    prob = ioFile.load_object(inFile)

    all_topic = []
    nTopic, nTerm = prob.shape
    for i in range(0, nTopic):
        topic = topNTerm(5, prob[i, :].reshape(nTerm, 1), 1, all_term)
        all_topic.append(topic)

    ioFile.save_object(all_topic, outFile)
Ejemplo n.º 9
0
            else:
                print 'Name of the category is incorrect, system with exit\n'
                sys.exit('System will exit')                    
            
    if options.clf is None:
            print 'No class filename specified, system with exit\n'
            sys.exit('System will exit')
    elif options.clf is not None:
            classDir = options.clf

    if options.clf_dict is None:
            print 'No class-dict filename specified, system with exit\n'
            sys.exit('System will exit') 
    elif options.clf_dict is not None:
            clf_dict = ioFile.load_object(options.clf_dict)              
            
    if options.output is None:
        if fun == 0:
            outFile = "class_topic_arxiv-category.pkl"
        elif fun == 1:
            outFile = "class_topic_acm-class.pkl"           
    elif options.output is not None:
            outFile = options.output
    
    all_clf_topic = topicOfClassificationForAllYear(probDir, modelDir, classDir, clf_dict, fun)
    
    ioFile.save_object(all_clf_topic, outFile)

    
    
    
Ejemplo n.º 10
0
if __name__ == "__main__":
    optparser = OptionParser()
    optparser.add_option('-f',
                         '--inputFile',
                         dest='input',
                         help='fileName',
                         default=None)
    optparser.add_option('-o',
                         '--outputFile',
                         dest='output',
                         help='fileName',
                         default=None)
    (options, args) = optparser.parse_args()

    if options.input is None:
        fname = sys.stdin
    elif options.input is not None:
        fname = options.input
    else:
        print 'No filename specified, system with exit\n'
        sys.exit('System will exit')

    if options.output is None:
        outFile = 'prob.pkl'
    elif options.output is not None:
        outFile = options.output

    prob = readProb(fname)

    ioFile.save_object(prob, outFile)
Ejemplo n.º 11
0
    if options.input_f is None:
        fname_f = sys.stdin
    elif options.input_f is not None:
        fname_f = options.input_f
    else:
        print 'No filename(.pkl) specified, system with exit\n'
        sys.exit('System will exit')

    if options.input_g is None:
        fname_g = sys.stdin
    elif options.input_g is not None:
        fname_g = options.input_g
    else:
        print 'No filename(.pkl) specified, system with exit\n'
        sys.exit('System will exit')

    if options.output is None:
        outFile = 'distance.pkl'
    elif options.output is not None:
        outFile = options.output

    prob_f = ioFile.load_object(fname_f)
    prob_g = ioFile.load_object(fname_g)

    all_distance, count = distanceBetweenTwoYears(prob_f, prob_g)

    print count, len(all_distance)

    ioFile.save_object(all_distance, outFile)