Beispiel #1
0
def train_and_trial(train_file, test_file, train_parse='', test_parse='', pickled=True, use_dep=False):
    global use_dep_parse
    if use_dep:
        use_dep_parse = True
    if pickled:
        f = open(train_file, 'rb')
        traind = cPickle.load(f)
        f.close()
        f = open(test_file, 'rb')
        testd = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(train_file)
        testd = XMLParser.create_exs(test_file)
    posi_words = semeval_util.get_liu_lexicon('positive-words.txt')
    negi_words = semeval_util.get_liu_lexicon('negative-words.txt')
    print "should really use better dictionary for sentence senti labels"
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    train_sentiment = [senti_classify(sent, posi_words, negi_words) for sent in traind['orig']]

    dep_parses = []
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'], train_parse, dictionary=True, iobs=True)
    chunker = ConsecutiveChunkTagger(zip(traind['iob'],traind['polarity']), senti_dictionary,
                                     train_sentiment, dep_parses)
    print "done training"
    test_sentiment = [senti_classify(sent, posi_words, negi_words) for sent in testd['orig']]
    dep_parses = [[]] * len(test_sentiment)
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(testd['iob'], test_parse, dictionary=True, iobs=True)
    results = []
    for i in range(len(test_sentiment)):
        results.append(chunker.parse((testd['iob'][i], test_sentiment[i], dep_parses[i])))
    return results
Beispiel #2
0
def train_and_trial(trn_file,
                    test_file,
                    clf,
                    posit_lex_file='positive-words.txt',
                    nega_lex_file='negative-words.txt',
                    pickled=False):
    """ Train on the training file and test on the testing file,
    given a classifier, for the aspect extraction task.
    """
    if pickled:
        f = open(trn_file, 'rb')
        traind = cPickle.load(f)
        f.close()
        f = open(test_file, 'rb')
        testd = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(trn_file)
        testd = XMLParser.create_exs(test_file)
    posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    #chunker = ConsecutiveChunker(traind['iob'], senti_dictionary)
    chunker = clf.train(traind['iob'], senti_dictionary)
    print "done training"

    guessed_iobs = chunker.evaluate(testd['iob'])
    XMLParser.create_xml(testd['orig'], guessed_iobs, testd['id'],
                         testd['idx'], 'trial_answers.xml')
    compute_pr(testd['iob'], guessed_iobs)
Beispiel #3
0
def train_and_trial(trn_file,
                    test_file,
                    parse_file_train,
                    parse_file_test,
                    use_dep=False,
                    posit_lex_file='positive-words.txt',
                    nega_lex_file='negative-words.txt',
                    pickled=False):
    """ Train on the training file and test on the testing file
    """
    global use_dep_parse
    if use_dep:
        use_dep_parse = True
    if pickled:
        f = open(trn_file, 'rb')
        traind = cPickle.load(f)
        f.close()
        f = open(test_file, 'rb')
        testd = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(trn_file)
        testd = XMLParser.create_exs(test_file)
    #posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    #negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    dep_parses = []
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'],
                                                         parse_file_train,
                                                         dictionary=True,
                                                         iobs=True)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    chunker = ConsecutiveChunker(traind['iob'], testd['iob'], senti_dictionary,
                                 dep_parses)
    print "done training on %d examples" % len(traind['iob'])
    '''
    f = open('learned.pkl','wb')
    cPickle.dump(chunker,f)
    f.close()
    '''
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'],
                                                         parse_file_test,
                                                         dictionary=True,
                                                         iobs=True)

    guessed_iobs = chunker.evaluate([testd['iob'], dep_parses])
    ###semeval_util.compute_pr(testd['iob'], guessed_iobs)
    return guessed_iobs
Beispiel #4
0
def train_and_test(filename,
                   parse_file,
                   use_deps=False,
                   posit_lex_file='positive-words.txt',
                   nega_lex_file='negative-words.txt'):
    """Creates an 80/20 split of the examples in filename,
    trains the chunker on 80%, and evaluates the learned chunker on 20%.
    """
    global use_dep_parse
    if use_deps:
        use_dep_parse = True
    traind = XMLParser.create_exs(filename)
    dep_parses = []
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'],
                                                         parse_file,
                                                         dictionary=True,
                                                         iobs=True)
    n = len(traind['iob'])
    split_size = int(n * 0.8)
    train = traind['iob'][:split_size]
    test = traind['iob'][split_size:]
    test_deps = []
    if use_dep_parse:
        test_deps = dep_parses[split_size:]
    #Liu not in use for now
    #posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    #negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    chunker = ConsecutiveChunker(train, test, senti_dictionary, dep_parses)
    guessed_iobs = chunker.evaluate([test, test_deps])
    semeval_util.compute_pr(test, guessed_iobs)
Beispiel #5
0
def K_fold_train_and_test(filename, posit_lex_file='positive-words.txt', nega_lex_file='negative-words.txt', k=2, pickled=False):
    """Does K-fold cross-validation on the given filename
    """
    if pickled:
        f = open(filename, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(filename)
    n = len(traind['iob'])
    #posi_words = get_liu_lexicon(posit_lex_file)
    #negi_words = get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    kf = cross_validation.KFold(n, n_folds=k, indices=True)
    tot_p, tot_r, tot_f1 = 0, 0, 0
    for train, test in kf:
        print "next fold, split size: %d/%d" %(len(train), len(test))
        #print train
        train_set = []
        test_set = []
        for i in train:
            train_set.append(traind['iob'][i])
        for i in test:
            test_set.append(traind['iob'][i])
        chunker = ConsecutiveChunker(train_set, senti_dictionary)
        guesses = chunker.evaluate(test_set)
        print test_set
        print guesses
        r, p, f = semeval_util.compute_pr(test_set, guesses)
        tot_p += p
        tot_r += r
        tot_f1 += f
    print "ave Prec: %.2f, Rec: %.2f, F1: %.2f" %(tot_p/float(k), tot_r/float(k), tot_f1/float(k))
Beispiel #6
0
def train_and_test(filename, parse_file, use_deps=False,
                   posit_lex_file='positive-words.txt', nega_lex_file='negative-words.txt'):
    """Creates an 80/20 split of the examples in filename,
    trains the chunker on 80%, and evaluates the learned chunker on 20%.
    """
    global use_dep_parse
    if use_deps:
        use_dep_parse = True
    traind = XMLParser.create_exs(filename)
    dep_parses = []
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'], parse_file, dictionary=True, iobs=True)
    n = len(traind['iob'])
    split_size = int(n * 0.8)
    train = traind['iob'][:split_size]
    test = traind['iob'][split_size:]
    test_deps = []
    if use_dep_parse:
        test_deps = dep_parses[split_size:]
    #Liu not in use for now
    #posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    #negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    chunker = ConsecutiveChunker(train, test, senti_dictionary, dep_parses)
    guessed_iobs = chunker.evaluate([test,test_deps])
    semeval_util.compute_pr(test, guessed_iobs)
Beispiel #7
0
def add_dep_parse_features(original,
                           parse_file,
                           pickled=True,
                           dictionary=False,
                           iobs=False):
    """Create the dependency tree dictionaries that we need for each sentence
    in the input corpus.
    Inputs:
    original: pickled version of our dictionary, or the dictionary itself,
    or the original XML file
    """
    if pickled and not dictionary:
        f = open(original, 'rb')
        traind = cPickle.load(f)
        f.close()
    elif dictionary:
        traind = original
    else:
        traind = XMLParser.create_exs(original)
    f = open(parse_file, 'r')
    lines = f.readlines()
    f.close()
    dep_trees = transform_dep_parse(lines)
    senti_dictionary = get_mpqa_lexicon()
    if iobs:
        new_dep_trees = integrate_dep_iob(traind, dep_trees, senti_dictionary)
    else:
        new_dep_trees = integrate_dep_iob(traind['iob'], dep_trees,
                                          senti_dictionary)
    return new_dep_trees
Beispiel #8
0
def create_parses_from_dict(input, ofile='dep_parse.txt', pickled=True):
    if pickled:
        f = open(input, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(input)
    return stanford_parse(traind['orig'], ofile)
 def write_available_plugins( self ):
     
     xml_fileset = glob.glob( 'plugins/*.xml' )
     
     for xml_file in xml_fileset:
         data = XMLParser.parseFile( xml_file )
         self.cursor.execute( 'INSERT INTO plugins VALUES (null, ?, ?, ?)', ( data['plugin']['name'], data['description'], data['main_module'] ) )
         
     self.connect.commit( )
Beispiel #10
0
def main(filen:str,filen2:str,settings:dict) :
    re=XMLParser.loadXML(filen)
    if 'q' in settings :
        re=dataqc.qc(re)
    if os.path.exists(filen2) :
        os.remove(filen2)
    f=open(filen2,'w',encoding='utf8')
    json.dump(re,f)
    f.close()
Beispiel #11
0
def K_fold_train_and_test(filename,
                          parse_file,
                          use_dep=False,
                          posit_lex_file='positive-words.txt',
                          nega_lex_file='negative-words.txt',
                          k=5,
                          pickled=False):
    """Does K-fold cross-validation on the given filename
    """
    global use_dep_parse
    if use_dep:
        print "using dependency parses"
        use_dep_parse = True
    if pickled:
        f = open(filename, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(filename)
    n = len(traind['iob'])
    dep_parses = traind['iob']
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'],
                                                         parse_file,
                                                         dictionary=True,
                                                         iobs=True)
    #posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    #negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    kf = cross_validation.KFold(n, n_folds=k, indices=True)
    tot_p, tot_r, tot_f1 = 0, 0, 0
    for train, test in kf:
        print "next fold, split size: %d/%d" % (len(train), len(test))
        #print train
        train_set = []
        test_set = []
        train_parse = []
        test_parse = []
        for i in train:
            train_set.append(traind['iob'][i])
            train_parse.append(dep_parses[i])
        for i in test:
            test_set.append(traind['iob'][i])
            test_parse.append(dep_parses[i])
        chunker = ConsecutiveChunker(train_set, test_set, senti_dictionary,
                                     train_parse)
        guesses = chunker.evaluate([test_set, test_parse])
        #print test_set
        #print guesses
        r, p, f = semeval_util.compute_pr(test_set, guesses)
        tot_p += p
        tot_r += r
        tot_f1 += f
    print "ave Prec: %.2f, Rec: %.2f, F1: %.2f" % (tot_p / float(k), tot_r /
                                                   float(k), tot_f1 / float(k))
Beispiel #12
0
def main():
  print "Models of computation simulator"
  if len(sys.argv) != 2:
    print "Usage:", sys.argv[0], "input-file"
    exit(-1)
  
  inputFile = sys.argv[1]
  print "Parsing input..."
  (input, output, processes) = XMLParser.parseXml(inputFile)
  scheduler = Scheduler.Scheduler(processes, input, output)
  scheduler.runModel()
  scheduler.outputResults()
Beispiel #13
0
def train_and_trial(trn_file, test_file, parse_file_train, parse_file_test, use_dep=False,
                    posit_lex_file='positive-words.txt', nega_lex_file='negative-words.txt', pickled=False):
    """ Train on the training file and test on the testing file
    """
    global use_dep_parse
    if use_dep:
        use_dep_parse = True
    if pickled:
        f = open(trn_file, 'rb')
        traind = cPickle.load(f)
        f.close()
        f = open(test_file, 'rb')
        testd = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(trn_file)
        testd = XMLParser.create_exs(test_file)
    #posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    #negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    dep_parses = []
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'], parse_file_train, dictionary=True, iobs=True)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    chunker = ConsecutiveChunker(traind['iob'], testd['iob'], senti_dictionary, dep_parses)
    print "done training on %d examples" % len(traind['iob'])
    '''
    f = open('learned.pkl','wb')
    cPickle.dump(chunker,f)
    f.close()
    '''
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'], parse_file_test, dictionary=True, iobs=True)

    guessed_iobs = chunker.evaluate([testd['iob'], dep_parses])
    ###semeval_util.compute_pr(testd['iob'], guessed_iobs)
    return guessed_iobs
Beispiel #14
0
def k_fold(filename, parse_filename, k=5, pickled=True, use_dep=False):
    global use_dep_parse
    if use_dep:
        use_dep_parse = True

    if pickled:
        f = open(filename, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(filename)
    n = len(traind['iob'])
    posi_words = semeval_util.get_liu_lexicon('positive-words.txt')
    negi_words = semeval_util.get_liu_lexicon('negative-words.txt')
    senti_dictionary = semeval_util.get_mpqa_lexicon()

    full_senti_label = [senti_classify(sentence, posi_words, negi_words) for sentence in traind['orig']]
    dep_parses = [[]] * n
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'], parse_filename, dictionary=True, iobs=True)
    kf = cross_validation.KFold(n, n_folds=k, indices=True)
    tot_acc = 0.
    for train, test in kf:
        print "next fold, split size: %d/%d" %(len(train), len(test))
        #print train
        train_set = []
        train_sentis = []
        train_parse = []

        test_set = []
        test_sentis = []
        test_parse = []
        for i in train:
            train_set.append((traind['iob'][i], traind['polarity'][i]))
            train_sentis.append(full_senti_label[i])
            train_parse.append(dep_parses[i])
        for i in test:
            test_set.append((traind['iob'][i], traind['polarity'][i]))
            test_sentis.append((full_senti_label[i]))
            test_parse.append(dep_parses[i])
        chunker = ConsecutiveChunkTagger(train_set, senti_dictionary, train_sentis, train_parse)
        acc = chunker.evaluate(zip(test_set, test_sentis, test_parse))
        print "acc:", acc
        tot_acc += acc
    print "average acc:", tot_acc/k
Beispiel #15
0
def K_fold_err_analysis(filename, parse_file, k=5, p=0.15, pickled=False):
    """Does K-fold cross-validation on the given filename, but only p percent of it
    """

    if pickled:
        f = open(filename, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(filename)
    #####
    n = int(len(traind['iob']) * p)
    dep_parses = traind['iob']
    #if use_dep_parse:
    #    dep_parses = add_dep_parse_features(traind['iob'], parse_file, dictionary=True, iobs=True)
    senti_dictionary = get_mpqa_lexicon()
    kf = cross_validation.KFold(n, n_folds=k, indices=True)
    tot_p, tot_r, tot_f1 = 0, 0, 0
    for train, test in kf:
        print "next fold, split size: %d/%d" % (len(train), len(test))
        #print train
        train_set = []
        test_set = []
        train_parse = []
        test_parse = []
        for i in train:
            train_set.append(traind['iob'][i])
            train_parse.append(dep_parses[i])
        for i in test:
            test_set.append(traind['iob'][i])
            test_parse.append(dep_parses[i])
        chunker = semevalTask4.ConsecutiveChunker(train_set, test_set,
                                                  senti_dictionary,
                                                  train_parse)
        guesses = chunker.evaluate([test_set, test_parse])
        r, p, f = compute_pr(test_set, guesses)
        tot_p += p
        tot_r += r
        tot_f1 += f
        #JUST ONE SPLIT FOR NOW!!!
        return
    print "ave Prec: %.2f, Rec: %.2f, F1: %.2f" % (tot_p / float(k), tot_r /
                                                   float(k), tot_f1 / float(k))
Beispiel #16
0
def train_and_test(filename, parse_file,
                   posit_lex_file='positive-words.txt', nega_lex_file='negative-words.txt',
                   pickled=False, use_dep=False):
    """Creates an 80/20 split of the examples in filename,
    trains the sentiment classifier on 80%, and evaluates the learned classifier on 20%.
    """
    global use_dep_parse
    if use_dep:
        use_dep_parse = True
    if pickled:
        f = open(filename, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(filename)
    n = len(traind['iob'])
    split_size = int(n * 0.8)
    train = zip(traind['iob'][:split_size], traind['polarity'][:split_size])
    test = zip(traind['iob'][split_size:], traind['polarity'][split_size:])
    posi_words = semeval_util.get_liu_lexicon(posit_lex_file)
    negi_words = semeval_util.get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()

    full_senti_label = [senti_classify(sentence, posi_words, negi_words) for sentence in traind['orig']]

    dep_parses = []
    if use_dep_parse:
        dep_parses = semeval_util.add_dep_parse_features(traind['iob'], parse_file, dictionary=True, iobs=True)
        print "first dep_parse:", dep_parses[0]
        print "first train ex:", train[0]
        print "size parses all:", len(dep_parses), "vs train:", len(dep_parses[:split_size])

    chunker = ConsecutiveChunkTagger(train, senti_dictionary, full_senti_label, dep_parses[:split_size])
    print "done training"

    if use_dep_parse:
        dep_parses = dep_parses[split_size:]
        print "first test dep parse:", dep_parses[0]
        print "first test ex:", test[0]
    else:
        #artifact of using zip, even if not using parses, need to have same # of elements in all lists
        dep_parses = [[]] * split_size
    print chunker.evaluate(zip(test, full_senti_label[split_size:], dep_parses))
Beispiel #17
0
def train_and_test(filename, posit_lex_file='positive-words.txt', nega_lex_file='negative-words.txt', pickled=False):
    """Creates an 80/20 split of the examples in filename,
    trains the chunker on 80%, and evaluates the learned chunker on 20%.
    """
    if pickled:
        f = open(filename, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(filename)
    n = len(traind['iob'])
    split_size = int(n * 0.8)
    train = traind['iob'][:split_size]
    test = traind['iob'][split_size:]
    #posi_words = get_liu_lexicon(posit_lex_file)
    #negi_words = get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    chunker = ConsecutiveChunker()
    chunker.train(train, senti_dictionary)
    guessed_iobs = chunker.evaluate(test)
    semeval_util.compute_pr(test, guessed_iobs)
Beispiel #18
0
    def work(fQ, eQ):
        xmlp = XMLParser.XMLParser()
        datp = DATParser.DATParser()

        while not fQ.empty():
            fp = fQ.get()
            if fp[-4:] == '.xml':
                parser = xmlp
            else:
                parser = datp
            logging.info('Parsing %s', os.path.basename(fp))
            print(os.path.basename(fp) + ' parsing')
            try:
                dpatns, badpatns = parser.parseFile(fp)
                print(os.path.basename(fp) + " parsed")
                # The len fun below works for both dicts (badpatns) and arrays (dpatns)
                logging.info("%d (%d bad) found in %s", len(dpatns),
                             len(badpatns), os.path.basename(fp))

                # DB: This next line, I think, is Andy loading the parsed good patents into the
                # multicore queue. I just have each thread insert the patents straight into the db.
                # dQ.put(dpatns)

                # DB: the below line inserts all of the good patents into
                # the database collection 'patns'. Assumes dpatns is of type array of dicts.
                dbase['patns'].insert(dpatns)
                print(os.path.basename(fp) + " in DB")

                # parser.patns = dict()	# toss old patns
                parser.patns = []
                # Could deal with bad patns instead of tossing them, but probably not worth it.
                # DB: put them into a mongo instance?
                parser.badpatns = {}
            except:
                logging.error("Error parsing %s",
                              os.path.basename(fp),
                              exc_info=True)
                eQ.put(fp)
            fQ.task_done()
        logging.info("Worker finished.")
Beispiel #19
0
 def work(fQ, dQ, eQ):
     xmlp = XMLParser.XMLParser()
     datp = DATParser.DATParser()
     
     while not fQ.empty():
         fp = fQ.get()
         if fp[-4:] == '.xml':
             parser = xmlp
         else:
             parser = datp
         logging.info("Parsing %s", os.path.basename(fp))
         try:
             dpatns,badpatns = parser.parseFile(fp)
             logging.info("%d (%d bad) found in %s", len(dpatns), len(badpatns), os.path.basename(fp))
             dQ.put(dpatns)
             parser.patns = dict()   # toss old patns
             # BUGBUG tossing bad patns instead of dealing with them
             parser.badpatns = dict()
         except:
             logging.error("Error parsing %s", os.path.basename(fp), exc_info=True)
             eQ.put(fp)
         fQ.task_done()
     #dictQ.close()
     logging.info("Worker finished.")
Beispiel #20
0
def train_and_test(filename,
                   posit_lex_file='positive-words.txt',
                   nega_lex_file='negative-words.txt',
                   pickled=False):
    """Creates an 80/20 split of the examples in filename,
    trains the chunker on 80%, and evaluates the learned chunker on 20%.
    """
    if pickled:
        f = open(filename, 'rb')
        traind = cPickle.load(f)
        f.close()
    else:
        traind = XMLParser.create_exs(filename)
    n = len(traind['iob'])
    split_size = int(n * 0.8)
    train = traind['iob'][:split_size]
    test = traind['iob'][split_size:]
    #posi_words = get_liu_lexicon(posit_lex_file)
    #negi_words = get_liu_lexicon(nega_lex_file)
    senti_dictionary = semeval_util.get_mpqa_lexicon()
    chunker = ConsecutiveChunker()
    chunker.train(train, senti_dictionary)
    guessed_iobs = chunker.evaluate(test)
    semeval_util.compute_pr(test, guessed_iobs)
Beispiel #21
0
parses_tests = ['laptops_test_phaseA-parse.txt','rest_test_phaseA-parse.txt','lap-trial-parse.txt']
results_files = ['lap_phaseA.xml','rest_phaseA.xml','lap-trial_phaseA.xml']

def get_data(dataset_name):
    idx = names.index(dataset_name)
    return pickle_trains[idx], pickle_tests[idx], parses_trains[idx], parses_tests[idx], results_files[idx]


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("task_name", help="must be either lap or rest or dummy", type=str)
    #later if time
    parser.add_argument("-p", help="Specify that train_file is an already learned clf",type=bool, default=False)
    parser.add_argument("-dep", help="If true, use dependency parse features", type=bool, default=False)
    args = parser.parse_args()

    train_file, test_file, parse_train_file, parse_test_file, out_xml_file = get_data(args.task_name)

    results = semevalTask4.train_and_trial(train_file, test_file, parse_train_file, parse_test_file,
                                           use_dep=args.dep, pickled=True)
    #create results file
    f = open(test_file, 'rb')
    testd = cPickle.load(f)
    f.close()
    XMLParser.create_xml(testd['orig'], results, testd['id'], testd['idx'], out_xml_file)





Beispiel #22
0
import IPPInterpret
import XMLParser
import sys
import argparse

argparser = argparse.ArgumentParser()
argparser.add_argument('--source', '-s', type=str, dest="source")
args = argparser.parse_args()
if args.source != None:
    file = open(args.source, "r")
else:
    file = sys.stdin

parser = XMLParser.XMLParser(file)
program = parser.parse_document()
interpret = IPPInterpret.IPPInterpret(program)
interpret.interpret_program()
Beispiel #23
0
from XMLParser import *
from DataBaseFacade import *
from ConfigParser import *

# Load input and database file information
parameters = sys.argv[1:]
if(len(parameters)<2):
	print "Error! You must pass two parameters: the first one should be the input file and second one should be a database info(host, user, password, port) json file"
	sys.exit(1)
for i in range(len(parameters)):
	if i == 0:	# input
		fileInput = parameters[i]
	if i == 1:	# database file information
		config = ConfigParser(parameters[i])

db = DataBaseFacade(name = DataBaseFacade.MYSQL,host = config.host(), user = config.user(), password = config.password(), port = config.port())

x = XMLParser(fileInput)
x.parse()
x.generate()
tablesInfo = x.getTablesInfo();
tablesData = x.getTablesData();

if db.createDatabase(x.getDatabaseName()):
	print "++++	Database Created Sucefully ++++"
if db.createTables(tablesInfo):
	print "++++ Tables Created Sucefully ++++"
if db.insertData(tablesData):
	print "++++ Data Inserted Sucefully ++++"
db.closeConnection()
Beispiel #24
0
def main():
    src = Source(rate=16000, channels=1, frames_size=21000)
    ch1 = ChannelPicker(channels=1, pick=1)
    # doa = DOA(rate=16000, chunks=3)

    model_path = get_model_path()

    config = Decoder.default_config()
    # config.set_string('-hmm', os.path.join(model_path, 'en-us'))
    # config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))

    config.set_string('-hmm', os.path.join(model_path, 'en-us'))
    config.set_string('-lm', '2823.lm')
    config.set_string('-verbose', 'False')
    config.set_string('-dict', '2823.dic')
    # config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict'))
    config.set_string('-kws', 'keyphrase.list')
    config.set_string('-logfn', '/dev/null')
    # config.set_string('-keyphrase', 'hey there')
    # config.set_float('-kws_threshold', 1e-30)
    sphinx = Sphinx(config)

    src.link(ch1)
    # src.link(doa)
    ch1.link(sphinx)

    graph = XMLParser(graph_file="basic.xml", debug=True).parse()
    arduino = serial.Serial('/dev/ttyACM0', 57600)
    arduino.timeout = 0.1


    # Check currnet state
    print("Current State: {}".format(graph.get_current_state().name))


    def on_graph_state_change():
        print("onStateChange()")
        # Runs through state responses
        print("\tNew Current State: {}".format(graph.state))
        print("\tExecuting responses for nextState...")

        if len(graph.state.get_responses()) > 0:
            print('Responses: {}'.format(len(graph.state.get_responses())))
            for response in graph.state.get_responses():
                print('\tRunning Response {}'.format(response))
                # do response action whether it has to do with moving motors, turning led, etc

                if response.typ == ResponseType.GO_TO_STATE:
                    graph.set_current_state(response.value)
                elif response.typ == ResponseType.LED:
                    pixels != null:
                        if response.value == 'listening':
                            pixels.think()
                        elif response.value == 'off':
                            pixels.off()
                        elif response.value == 'hello':
                            pixels.speak()
                        elif response.value == 'following':
                            pixels.following()
                        elif response.value == 'doa':
                            if mic != null:
                                pixels.wakeup(mic.direction)
                        else:
                            print("Unknown LED value: {} was found.".format(response.value))
                elif response.typ == ResponseType.MOTOR_MOVE:
                    if response.value == 'forward':
                        arduino.write("d:f;")
                    elif response.value == 'stop':
                        arduino.write("d:s;")
                elif response.typ == ResponseType.CAMERA_MOVE:
                    if response.value == 'doa':
                        if mic != null:
                            voice_direction = mic.direction
                            print "voice from " + str(voice_direction)
                            arduino_command = "m:" + str(voice_direction) + ";"
                            if voice_direction < 180:
                                #voice is coming from behind
                                voice_direction = (voice_direction + 180) % 360
                            else:
                                #voice is coming from in front
                                voice_direction = 90

                        arduino_command = arduino_command + "c:" + str(voice_direction) + ",120;"
                        arduino.write(arduino_command)
                        last_time_motor_moved = simpletime.time()
                        print("@done@")
                elif response.typ == ResponseType.VOICE_RESPONSE:
                    text = response.value.replace(' ', '_')

                    #Calls the Espeak TTS Engine to read aloud a Text
                    call([cmd_beg+cmd_out+text+cmd_end], shell=True)
                else:
                    print("Unused response type: {}.".format(response.typ))
Beispiel #25
0
    def start(self, inputFilePath, outputPath, fileName, config):

        currentDir = readConfig.getCurrentScriptPath()

        apkFilesPath = os.path.join(currentDir, config["apkFilesPath"])
        tempPath = os.path.join(currentDir, "tmp_" + fileName)
        androidManifest = os.path.join(tempPath, "AndroidManifest.xml")
        utilsPath = os.path.join(currentDir, config["utilsPath"])
        filesPath = os.path.join(currentDir, config["filesPath"])
        apkPath = os.path.join(apkFilesPath, fileName)

        print("\n APKPath: %s\n TmpPath: %s\n " % (apkPath, tempPath))

        if not (os.path.exists(apkFilesPath)):
            os.mkdir(apkFilesPath)

        # 判断APK是否已经保护过
        zipFile = os.path.join(utilsPath, '7za.exe')

        if not os.path.exists(zipFile):
            zipFile = '7za'

        assertLibPath = os.path.join(apkFilesPath, fileName[:-4])
        self.delete(assertLibPath)
        if self.sysstr == "Linux":
            cmd = 'unzip "%s" "lib/*" -d "%s"' % (apkPath, assertLibPath)
        else:
            cmd = ' %s x -aoa %s "assets" "lib"  -o%s ' % (zipFile, apkPath,
                                                           assertLibPath)
        subprocess.call(cmd, shell=True)
        #print ("\n command is %s\n" %cmd)
        #print ("\n Search path is: %s \n" %assertLibPath)

        for root, dirs, files in os.walk(assertLibPath):
            for temp in files:
                #print ("\n Search path is: %s \n" %temp)
                if temp.find(globalValues.IsBangBang) >= 0 or temp.find(
                        globalValues.IsAjiami) >= 0 or temp.find(
                            globalValues.IsProtect) >= 0:
                    globalValues.returnValue = 1
                    delete(assertLibPath)
                    print("This APK has been reinforced!!!")
                    sys.exit(1)

        self.delete(assertLibPath)

        # 解压APK
        packunpack.unpackApk(
            os.path.join(utilsPath, globalValues.APKToolsName), apkPath,
            tempPath)

        #读取values/String.xml和AndroidManifest.xml
        xmlparser = XMLParser.XMLParser(androidManifest, config)

        if len(globalValues.args) >= 2:
            #将args[1]保存到字典中
            if sysstr == "Linux":
                path_index = globalValues.args[1].rfind('/') + 1
            else:
                path_index = globalValues.args[1].rfind('\\') + 1
            out_path = globalValues.args[1][0:path_index]
            if (out_path[0] == '.'):
                out_path = sys.path[0] + out_path[1:]
            print("out_path: %s" % out_path)
        else:
            out_path = outputPath

        #读string文件
        xmlparser.read_string_XML(tempPath)
        reader = xmlparser.get_XML_Result(out_path)

        #调用__init__函数
        file_opt = fileOpt.fileOpt(apkPath, filesPath, tempPath, outputPath)
        file_opt.set_reader(reader)
        file_opt.set_xmlparser(xmlparser)

        #file_opt.copySmaliFile()

        #file_opt.copyLib()
        #新添加
        #file_opt.copyAssets()
        #file_opt.changeLauncher(androidManifest)
        #主activity文件onCreate中添加代码
        #file_opt.changeActivity()
        file_opt.addDialog()

        #重打包
        outPath = os.path.join(outputPath, fileName)
        file_opt.finish(outPath)
        print('repackage App finish!')
        senti_dictionary = semeval_util.get_mpqa_lexicon()
        negate_wds = semeval_util.negateWords
        results = []
        for iob in traind['iob']:
            polarities = semeval_util.create_sentiment_sequence(iob, senti_dictionary, negate_wds)
            translated = []
            for p, n in polarities:
                if p > n:
                    translated.append('positive')
                elif n > p:
                    translated.append('negative')
                else:
                    translated.append('neutral')
            results.append(translated)
        semeval_util.compute_sent_acc(traind['polarity'], results)
        XMLParser.create_xml(traind['orig'], traind['iob'], traind['id'], traind['idx'], sentiments=results,
                             outfile='baseline.xml')
        sys.exit()
    else:
        results = task4_stask2.train_and_trial(train_file, test_file)

    #create results file
    f = open(test_file, 'rb')
    testd = cPickle.load(f)
    f.close()
    XMLParser.create_xml(testd['orig'], testd['iob'], testd['id'], testd['idx'], sentiments=results, outfile=out_xml_file)





Beispiel #27
0
def main():
    model_path = get_model_path()
    config = Decoder.default_config()
    # config.set_string('-hmm', os.path.join(model_path, 'en-us'))
    # config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))
    # config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict'))
    config.set_string('-hmm', os.path.join(model_path, 'en-us'))
    config.set_string('-lm', '2823.lm')
    config.set_string('-verbose', 'False')
    config.set_string('-dict', '2823.dic')
    config.set_string('-kws', 'keyphrase.list')
    config.set_string('-logfn', '/dev/null')

    graph = XMLParser(graph_file="basic.xml", debug=True).parse()
    arduino = serial.Serial('/dev/ttyACM0', 57600)
    arduino.timeout = 0.1

    # Check currnet state
    print("Current State: {}".format(graph.get_current_state().name))

    decoder = Decoder(config)

    # while True:
    #     simpletime.sleep(0.1)
    #     try:
    #         # arduino_says = arduino.readline()
    #         # if (len(arduino_says) > 0):
    #         #     print('\nRaw: ' + arduino_says)
    #         # arduino_says = arduino_says.replace('\r', '')
    #         # arduino_says = arduino_says.replace('\n', '')
    #         # if "m:done;" in arduino_says or "e:ready;" in arduino_says:
    #         #     print('Live!')
    #         #     local.arduino_busy = False
    #         # sys.stdout.write(".")
    #         # sys.stdout.flush()
    #     except KeyboardInterrupt:
    #         break

    #src.recursive_stop()

    def on_graph_state_change():
        print("onStateChange()")
        # Runs through state responses
        print("\tNew Current State: {}".format(graph.state))
        print("\tExecuting responses for nextState...")

        if len(graph.state.get_responses()) > 0:
            print('Responses: {}'.format(len(graph.state.get_responses())))
            for response in graph.state.get_responses():
                print('\tRunning Response {}'.format(response))
                # do response action whether it has to do with moving motors, turning led, etc

                if response.typ == ResponseType.GO_TO_STATE:
                    graph.set_current_state(response.value)
                elif response.typ == ResponseType.LED:
                    if pixels is not None:
                        if response.value == 'listening':
                            pixels.think()
                        elif response.value == 'off':
                            pixels.off()
                        elif response.value == 'hello':
                            pixels.speak()
                        elif response.value == 'following':
                            pixels.spin()
                        elif response.value == 'doa':
                            if mic is not None:
                                pixels.wakeup(mic.direction)
                        else:
                            print("Unknown LED value: {} was found.".format(
                                response.value))
                elif response.typ == ResponseType.MOTOR_MOVE:
                    if response.value == 'forward':
                        arduino.write("d:f;")
                    elif response.value == 'stop':
                        arduino.write("d:s;")
                elif response.typ == ResponseType.CAMERA_MOVE:
                    if response.value == 'doa':
                        if mic is not None:
                            voice_direction = mic.direction
                            print("voice from " + str(voice_direction))
                            arduino_command = "m:" + str(voice_direction) + ";"
                            if voice_direction < 180:
                                #voice is coming from behind
                                voice_direction = (voice_direction + 180) % 360
                            else:
                                #voice is coming from in front
                                voice_direction = 90

                        arduino_command = arduino_command + "c:" + str(
                            voice_direction) + ",120;"
                        arduino.write(arduino_command)
                        last_time_motor_moved = simpletime.time()
                        print("@done@")
                elif response.typ == ResponseType.VOICE_RESPONSE:
                    text = response.value.replace(' ', '_')

                    #Calls the Espeak TTS Engine to read aloud a Text
                    call([cmd_beg + cmd_out + text + cmd_end], shell=True)
                else:
                    print("Unused response type: {}.".format(response.typ))
        else:
            print('\tResponding with nothing')

    class local:
        # arduino_busy = True
        voices = {}
        position = None

    def on_detected(word):
        start = datetime.now()
        if simpletime.time() - last_time_motor_moved > 0.4:
            print("on_detected with word = ")
            graph.apply_action(ActionType.VOICE_COMMAND, word.hypstr)
        else:
            print("on_detected ignored - motor movement")
        print(datetime.now() - start)
        # if 'odd bot' in word.hypstr and 'follow me' in word.hypstr:
        #     pixels.think()
        # else:
        #     print(word.hypstr)
        #     #     print("Arduino is busy. Doing nothing")
        #     #     return
        #     local.position = doa.get_direction()
        #     pixels.wakeup(local.position)
        #     print(datetime.now() - start)
        #     # local.arduino_busy = True
        #     print('\nDirection {}'.format(local.position) + " Sent: " + str(local.position))

        # arduino.write("m:" + str(k) + ";c:" + str(randint(30, 150)) + "," + str(randint(30,150)) + ";")

    graph.set_on_state_change(on_graph_state_change)
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=2048)
    stream.start_stream()

    in_speech_bf = False
    decoder.start_utt()
    while True:
        try:
            buf = stream.read(2048, exception_on_overflow=False)
            if buf:
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        # print 'Result:', decoder.hyp().hypstr
                        on_detected(decoder.hyp())
                        decoder.start_utt()
            else:
                break
        except KeyboardInterrupt:
            break
    decoder.end_utt()
def import_molecule (name):
  print "Importing %s..." % name,
  g = XP.parse_file("./molecule_data/" + name)
  print "done."
  #strip .xml from the name, add name * graph * iso_map to list
  return (name[0:-4],g,{})
Beispiel #29
0
def main(filen:str,filen2:str,settings:dict) :
    try :
        re=XMLParser.loadXML(filen)
    except:
        f=open(filen2,'r',encoding='utf8')
        re=json.load(f)
        f.close()
    if 'q' in settings :
        re=dataqc.qc(re)
    if os.path.exists(filen2) :
        removedir(filen2)
    os.mkdir(filen2)
    def writexls(fn:str,settings:dict,re:list):
        w=xlwt.Workbook(encoding='utf8')
        t:xlwt.Worksheet=w.add_sheet('每首歌听歌时间')
        t.set_panes_frozen('1')
        t.set_vert_split_pos(1)
        t.set_horz_split_pos(1)
        ti=['排名','播放时间(s)','播放时间','占比','播放次数','标题','艺术家','专辑','轨道艺术家','专辑艺术家','年份','光盘编号','轨道编号','编码','编码扩展','扩展名','比特率','采样频率','声道数','长度','长度(s)','上次播放']
        ti2=['playcount','title','artist','album','trackartist','albumartist','date','discnumber','tracknumber','codec','codecprofile','ext','bitrate','samplerate','channels','length','lengthseconds','lastplayed']
        ti3=[0.35,0.9,1,0.7,0.7,2.8,2,3.6,1,2,0.4,0.7,0.7,0.5,0.7,0.5,0.5,0.7,0.5,0.5,0.7,1.5]#宽度
        if not 'p' in settings :
            ti=ti[:3]+ti[4:]
            ti3=ti3[:3]+ti3[4:]
        k=0
        for i in ti:
            t.write(0,k,i)
            rr:xlwt.Column=t.col(k)
            rr.width=int(rr.width*ti3[k])
            k=k+1
        if 'p' in settings:
            s=xlwt.XFStyle()
            s.num_format_str='0.00%'
        r=re
        sort(r,'playtime')
        k=1
        tt=0
        tk=1
        for i in r :
            if i['playtime']!=tt :
                tt=i['playtime']
                tk=k
            t.write(k,0,tk)
            t.write(k,1,i['playtime'])
            t.write(k,2,getlengthstr(i['playtime']))
            n=3
            if 'p' in settings :
                t.write(k,3,xlwt.Formula('B%s/SUM(B2:B%s)'%(k+1,len(r)+1)),s)
                n=4
            for j in ti2 :
                if j in i :
                    t.write(k,n,i[j])
                n=n+1
            k=k+1
        t:xlwt.Worksheet=w.add_sheet('艺术家听歌时间')
        t.set_panes_frozen('1')
        t.set_vert_split_pos(1)
        t.set_horz_split_pos(1)
        ti=['排名','播放时间(s)','播放时间','占比','艺术家']
        ti3=[0.35,0.9,1,0.7,2]
        if not 'p' in settings :
            ti=ti[:3]+ti[4:]
            ti3=ti3[:3]+ti3[4:]
        k=0
        for i in ti :
            t.write(0,k,i)
            rr:xlwt.Column=t.col(k)
            rr.width=int(rr.width*ti3[k])
            k=k+1
        r=getartistplaytimelist(re)
        sort(r,'playtime')
        k=1
        tt=0
        tk=1
        for i in r :
            if i['playtime']!=tt :
                tt=i['playtime']
                tk=k
            t.write(k,0,tk)
            t.write(k,1,i['playtime'])
            t.write(k,2,getlengthstr(i['playtime']))
            if 'p' in settings :
                t.write(k,3,xlwt.Formula('B%s/SUM(B2:B%s)'%(k+1,len(r)+1)),s)
                t.write(k,4,i['artist'])
            else :
                t.write(k,3,i['artist'])
            k=k+1
        t:xlwt.Worksheet=w.add_sheet('专辑听歌时间')
        t.set_panes_frozen('1')
        t.set_vert_split_pos(1)
        t.set_horz_split_pos(1)
        ti=['排名','播放时间(s)','播放时间','占比','专辑','专辑艺术家']
        ti3=[0.35,0.9,1,0.7,3.6,2]
        if not 'p' in settings :
            ti=ti[:3]+ti[4:]
            ti3=ti3[:3]+ti3[4:]
        k=0
        for i in ti :
            t.write(0,k,i)
            rr:xlwt.Column=t.col(k)
            rr.width=int(rr.width*ti3[k])
            k=k+1
        r=getalbumplaytimelist(re)
        sort(r,'playtime')
        k=1
        tt=0
        tk=1
        for i in r :
            if i['playtime']!=tt :
                tt=i['playtime']
                tk=k
            t.write(k,0,tk)
            t.write(k,1,i['playtime'])
            t.write(k,2,getlengthstr(i['playtime']))
            if 'p' in settings :
                t.write(k,3,xlwt.Formula('B%s/SUM(B2:B%s)'%(k+1,len(r)+1)),s)
                t.write(k,4,i['album'])
                t.write(k,5,i['albumartist'])
            else :
                t.write(k,3,i['album'])
                t.write(k,4,i['albumartist'])
            k=k+1
        t:xlwt.Worksheet=w.add_sheet('专辑-艺术家听歌时间')
        t.set_panes_frozen('1')
        t.set_vert_split_pos(1)
        t.set_horz_split_pos(1)
        ti=['排名','播放时间(s)','播放时间','占比','艺术家','专辑','专辑艺术家']
        ti3=[0.35,0.9,1,0.7,2,3.6,2]
        if not 'p' in settings :
            ti=ti[:3]+ti[4:]
            ti3=ti3[:3]+ti3[4:]
        k=0
        for i in ti :
            t.write(0,k,i)
            rr:xlwt.Column=t.col(k)
            rr.width=int(rr.width*ti3[k])
            k=k+1
        r=getalbumartistplaytimelist(re)
        sort(r,'playtime')
        k=1
        tt=0
        tk=1
        for i in r :
            if i['playtime']!=tt :
                tt=i['playtime']
                tk=k
            t.write(k,0,tk)
            t.write(k,1,i['playtime'])
            t.write(k,2,getlengthstr(i['playtime']))
            if 'p' in settings :
                t.write(k,3,xlwt.Formula('B%s/SUM(B2:B%s)'%(k+1,len(r)+1)),s)
                t.write(k,4,i['artist'])
                t.write(k,5,i['album'])
                t.write(k,6,i['albumartist'])
            else :
                t.write(k,3,i['artist'])
                t.write(k,4,i['album'])
                t.write(k,5,i['albumartist'])
            k=k+1
        if 'hid' in settings :
            r=geteverydayplaytimelist(re,True)
        else :
            r=geteverydayplaytimelist(re)
        if 'hp' in settings :
            sort(r['r'],'playtime')
        t:xlwt.Worksheet=w.add_sheet('每日听歌时间')
        t.set_panes_frozen('1')
        t.set_vert_split_pos(1)
        t.set_horz_split_pos(1)
        ti=['序号','日期','播放时间(s)','播放时间','占比']
        ti3=[0.35,1.5,0.9,1,0.7]
        if not 'p' in settings :
            ti=ti[:-1]
            ti3=ti3[:-1]
        k=0
        for i in ti :
            t.write(0,k,i)
            rr:xlwt.Column=t.col(k)
            rr.width=int(rr.width*ti3[k])
            k=k+1
        k=1
        for i in r['r'] :
            t.write(k,0,k)
            t.write(k,1,i['timestr'])
            t.write(k,2,i['playtime'])
            t.write(k,3,getlengthstr(i['playtime']))
            if 'p' in settings :
                t.write(k,4,xlwt.Formula('C%s/SUM(C2:C%s)'%(k+1,len(r['r'])+1)),s)
            k=k+1
        if 'hid' in settings :
            if 'hp' in settings :
                sort(r['r'],'time',False)
            t:xlwt.Worksheet=w.add_sheet('每日听歌时间(详细记录)')
            t.set_panes_frozen('1')
            t.set_vert_split_pos(1)
            t.set_horz_split_pos(1)
            ti=['序号','播放时间','播放次数','标题','艺术家','专辑','轨道艺术家','专辑艺术家','年份','光盘编号','轨道编号','编码','编码扩展','扩展名','比特率','采样频率','声道数','长度','长度(s)']
            ti2=['playcount','title','artist','album','trackartist','albumartist','date','discnumber','tracknumber','codec','codecprofile','ext','bitrate','samplerate','channels','length','lengthseconds']
            ti3=[0.5,1.5,0.7,2.8,2,3.6,1,2,0.4,0.7,0.7,0.5,0.7,0.5,0.5,0.7,0.5,0.5,0.7]
            k=0
            for i in ti :
                t.write(0,k,i)
                rr:xlwt.Column=t.col(k)
                rr.width=int(rr.width*ti3[k])
                k=k+1
            k=1
            for i in r['r']:
                for j in r['d'][i['timestr']]:
                    t.write(k,0,k)
                    t.write(k,1,j['ts'])
                    n=2
                    for m in ti2:
                        if m in re[j['i']] :
                            t.write(k,n,re[j['i']][m])
                        n=n+1
                    k=k+1
        t:xlwt.Worksheet=w.add_sheet('发行年份听歌时间')
        t.set_panes_frozen('1')
        t.set_vert_split_pos(1)
        t.set_horz_split_pos(1)
        ti=['序号','年份','播放时间(s)','播放时间','占比']
        ti3=[0.35,0.4,0.9,1,0.7]
        if not 'p' in settings :
            ti=ti[:-1]
            ti3=ti3[:-1]
        k=0
        for i in ti :
            t.write(0,k,i)
            rr:xlwt.Column=t.col(k)
            rr.width=int(rr.width*ti3[k])
            k=k+1
        r=getdateplaytimelist(re)
        if 'dp' in settings :
            sort(r,'playtime')
        else :
            sort(r,'date',False)
        k=1
        for i in r :
            t.write(k,0,k)
            t.write(k,1,i['date'])
            t.write(k,2,i['playtime'])
            t.write(k,3,getlengthstr(i['playtime']))
            if 'p' in settings :
                t.write(k,4,xlwt.Formula('C%s/SUM(C2:C%s)'%(k+1,len(r)+1)),s)
            k=k+1
        w.save(fn)
    getlength(re)
    if 'a' in settings:
        writexls("%s\\all.xls"%(filen2),settings,re)
    if 'y' in settings and 'm' in settings and settings['y']=='all' and settings['m']=='all' :
        temp=autogetyearormonth(re,True,True)
        settings['y']=temp['y']
        settings['m']=temp['m']
    elif 'y' in settings and settings['y']=='all' :
        temp=autogetyearormonth(re)
        settings['y']=temp['y']
    elif 'm' in settings and settings['m']=='all' :
        temp=autogetyearormonth(re,False,True)
        settings['m']=temp['m']
    elif 'y' in settings :
        sorttimestruct(settings['y'],False)
    elif 'm' in settings :
        sorttimestruct(settings['m'],False)
    if 'y' in settings :
        for i in settings['y'] :
            writexls('%s\\%s.xls'%(filen2,time.strftime('%Y',i)),settings,gettimelist(re,i))
    if 'm' in settings :
        for i in settings['m'] :
            writexls('%s\\%s.xls'%(filen2,time.strftime('%Y%m',i)),settings,gettimelist(re,i,False,True))
Beispiel #30
0
def main(filen: str, filen2: str, settings: dict):
    try:
        re = XMLParser.loadXML(filen)
    except:
        f = open(filen, 'r', encoding='utf8')
        re = json.load(f)
        f.close()
    if 'q' in settings:
        re = dataqc.qc(re)
    if os.path.exists(filen2):
        os.remove(filen2)
    w = xlwt.Workbook()
    a: xlwt.Worksheet = w.add_sheet('原数据')
    ti2 = [
        '序号', '标题', '艺术家', '轨道艺术家', '专辑', '专辑艺术家', '年份', '光盘编号', '轨道编号', '编码',
        '编码扩展', '扩展名', '比特率', '采样频率', '声道数', '长度', '长度(s)', '播放次数', '上次播放',
        '播放记录'
    ]
    t = [
        'id', 'title', 'artist', 'trackartist', 'album', 'albumartist', 'date',
        'discnumber', 'tracknumber', 'codec', 'codecprofile', 'ext', 'bitrate',
        'samplerate', 'channels', 'length', 'lengthseconds', 'playcount',
        'lastplayed', 'playedtimes'
    ]
    ti = [
        0.35, 2.8, 2, 1, 3.6, 2, 0.4, 0.7, 0.7, 0.5, 0.7, 0.5, 0.5, 0.7, 0.5,
        0.5, 0.7, 0.7, 1.5, 1
    ]  #宽度
    if 'h' in settings:
        t2 = ['序号', '播放时间']
        ti3 = [0.35, 1.5]
        b: xlwt.Worksheet = w.add_sheet('历史记录')
        j = 0
        for i in t2:
            b.write(0, j, i)
            r: xlwt.Column = b.col(j)
            r.width = int(r.width * ti3[j])
            j = j + 1
        t = t[:-1]
        ti2 = ti2[:-1]
        k2 = 1
    j = 0
    for i in ti2:
        a.write(0, j, i)
        r: xlwt.Column = a.col(j)
        r.width = int(r.width * ti[j])
        j = j + 1
    j = 1
    if 'h' in settings:
        t.append('playedtimes')
    for i in re:
        a.write(j, 0, j)
        k = 1
        for ii in t[1:]:
            if ii in i:
                if 'h' in settings and ii == 'playedtimes':
                    for iii in i[ii]:
                        b.write(k2, 0, j)
                        b.write(k2, 1, iii)
                        k2 = k2 + 1
                else:
                    a.write(j, k, i[ii])
            k = k + 1
        j = j + 1
    w.save(filen2)
import XMLParser as XP

g = XP.parse_file("molecule_data/CID_962.xml");

for (u,n) in g.node_dict.iteritems():
  print "(%s,%d)" % (n.label,g.index_dict[u])

print g.adj_matrix
Beispiel #32
0
# Not implemented

# 2. Get stuff out of the DTD1
elements = DTDParser.getElements(exampleDTD1)
dbHandler = DBHandler.DBHandler()
dbHandler.createNewTable(elements)
print 'DTD parser output:'
print elements
print ''

# 3. Get stuff out of the DTD2
# Not implemented

# 3. Fill database with stuff from the XML1 document and return the database
# Basic and very hacky version works, f**k you recursion

XMLParser.parseXML(exampleXML, dbHandler) # parses the xml doc and inserts rows into the sqlite db
print 'Database content after parsing xml doc:'
print dbHandler.executeQuery('select * from xmldata')

# 4. Take mappings from the user
# Not implemented

# 5. Construct the XML2 document
# Not implemented

# 6. Validate XML2 document against DTD2
# Not implemented

# cleanup
dbHandler.closeCursor()
Beispiel #33
0
from XMLParser import XMLParser as parser
import XMLParser
# import XMLParser

dummy_object = XMLParser.XMLParserObject(
    'D:\Guru\Project\Project Tasks\VBAMacro\ClearEmptyPages\Macro Enabled Word Document - Copy\word\document.xml'
)
string_object = parser.xmlToString(parser_object=dummy_object)
# print(string_object)
parser.generateXmlObject(parser_object=dummy_object)
tag_elements = parser.findElementsByTagName(parser_object=dummy_object,
                                            tag_name='w')
for element in tag_elements:
    print(element)
	def __init__(self, data):

		"""
		Class Variables:
			self.data: data which is to be plotted.

			self.x_data: slice of the self.data alongthe x-axis
			self.y_data: slice of the self.data alongthe y-axis
			self.z_data: slice of the self.data alongthe z-axis.

			self.minimum: Intialised with the value 0, Default slicing for the data
			self.maximum: Intialised with the value 10000, Default slicing of the data, Both these class variable implies that the 
					Intial plot data will be plotted from data[self.minimum: self.maximum]

			Their values can be changed from the "Go Plot!!" button present at the end of the Frame.

			self.log_panel: wx.Panel which will have the standard input and out bound to it.
			self.New_Tab: wx.Notebook which is opened as a new tab whenever the new tab option is clicked from the file menu.

		"""
		
		self.selected_checkboxes = list()
		self.axis_3d = True
		self.tab_count = 0
		self.minimum = 0
		self.maximum = 1000
		self.data = data
		self.x_data= None
		self.y_data= None
		self.z_data= None
		self.base_axis = None
		
		wx.Frame.__init__(self, None, -1, size=(800,600), pos=((wx.DisplaySize()[0])/2,(wx.DisplaySize()[1])/2), style=wx.MAXIMIZE_BOX | wx.RESIZE_BORDER | wx.SYSTEM_MENU | wx.CAPTION | wx.CLOSE_BOX)
		self.Button_vbox= wx.BoxSizer(wx.VERTICAL)

		#Splitter window
		self.window= wx.SplitterWindow(self, wx.ID_ANY, style=wx.SP_3D | wx.SP_BORDER, size=(800,600))	
		
		
		#Two panels
		self.left_panel = wx.Panel(self.window, wx.ID_ANY)
		self.right_panel = wx.Panel(self.window, wx.ID_ANY)
		



		#Notebook on which the matplotlib panel will be inserted
		self.New_Tab = fnb.FlatNotebook(self.right_panel, style=fnb.FNB_TABS_BORDER_SIMPLE|fnb.FNB_VC71)


		font = wx.Font(6, wx.SWISS, wx.NORMAL, wx.NORMAL, False, u'Comic Sans MS')
		font_bottom = wx.Font(7, wx.FONTFAMILY_TELETYPE, wx.FONTFAMILY_DECORATIVE, wx.FONTWEIGHT_BOLD, True, u'Comic Sans MS')

		self.matplotlib_panel= MatplotlibPanel(self.New_Tab, self.tab_count, self.data, self.minimum, self.maximum)
		self.New_Tab.AddPage(self.matplotlib_panel, "Tab %s"%self.tab_count)
		self.tab_count += 1

		#This panel will have all the varibales present in the data file
		self.wxpanel= wx.PyScrolledWindow(self.left_panel, -1,)
		self.wxpanel.SetFont(font_bottom)
		self.wxpanel.SetBackgroundColour("DARKCYAN")
		self.log_window = wx.TextCtrl(self.left_panel, wx.ID_ANY, size=(300, 150), style = wx.TE_MULTILINE|wx.VSCROLL|wx.TE_BESTWRAP| wx.TE_WORDWRAP)
		self.log_window.SetFont(font)

		#This method populates the variable spresent in the file into the scrolled window
		self.checkbox_list = list()
		self.populate_variables(self.data, self.wxpanel, self.checkbox_list)


		self.vbox_left = wx.BoxSizer(wx.VERTICAL)
		self.vbox_left.Add(self.log_window, 0, wx.EXPAND| wx.ALL, 2)
		self.vbox_left.Add(self.wxpanel, 1, wx.EXPAND| wx.ALL, 2)
		self.left_panel.SetSizer(self.vbox_left)
		
		self.vbox_right = wx.BoxSizer(wx.VERTICAL)
		self.vbox_right.Add(self.New_Tab, 20, wx.EXPAND| wx.ALL, 1)
		self.right_panel.SetSizer(self.vbox_right)
		
		
		#This part generates the menu from the menu.xml present in the same directory
		menudata = XMLParser.xml_data("menu.xml")
		XMLParser.createMenus(self, menudata, self)

		sizer = wx.BoxSizer(wx.VERTICAL)
		self.window.SplitVertically(self.left_panel, self.right_panel)
		sizer.Add(self.window, 1, wx.EXPAND, 0)
		self.SetSizer(sizer)
		sizer.Fit(self)
		
		#This part redirects the standard output and standard input on the console embedded in the wx.Frame
		redir = RedirectText(self.log_window)
		sys.stdout = redir
		sys.stderr = redir
#		self.SetSizer(self.hbox)
		self.SetBackgroundColour("light blue")
		self.statusbar = self.CreateStatusBar()
		self.Centre()
		self.Show()
Beispiel #35
0
import normalization
import denormalization
import generateTargetVariable


xNumGrid = 19
yNumGrid = 19
classMappingDict = {'dog': 0, 'cat' : 1}



inpFilePic = "D:/Assignments/Sem 2/Deep learning/Project/Yolo/dl_project/sample_files/twoObjectsCorrect.jpg"
inpFileXML = "D:/Assignments/Sem 2/Deep learning/Project/Yolo/dl_project/sample_files/twoObjectsCorrect.xml"
outputImg = "normalized_img.jpg"

imageDict, ObjList = XMLParser.parseXMLtoDict(inpFileXML)
targetArray = generateTargetVariable.genTargetArray(inpFilePic,imageDict, ObjList,xNumGrid,yNumGrid,classMappingDict)



##generate new image
#imageResize(inpFilePic,outputImg,29,29)

# BB



filepath = inpFilePic
imageDict, objectList = XMLParser.parseXMLtoDict(inpFileXML)
gridImg = plotGridAndBound.plotGridOnImg(filepath,3,3,objectList)
gridImg.savefig("griddedImage.jpg")
Beispiel #36
0
from StateGraph import *
from XMLParser import *

graph = XMLParser(graph_file="sample1a.xml", debug=False).parse()

# Check currnet state
print("Current State: {}".format(graph.get_current_state().name))

# Simulate an action
print("Simulating Action of VoiceCommand hello")
graph.apply_action(ActionType.VOICE_COMMAND, 'hello')

# Check currnet state
print("Current State: {}".format(graph.get_current_state().name))


"""
Output:

Parsing...
State:
	Name: Root state
	StateActions: 1 actions
		StateAction: (Type: voice_command, Value: hello, To: State that says hello back)
	Responses: 0 responses
State:
	Name: State that says hello back
	StateActions: 0 actions
	Responses: 3 responses
		Response: (Name: Saying Hello Back with LED, Type: led, Value: Some Random LED Value)
		Response: (Name: Sleeping for 5 seconds, Type: sleep, Value: 5000)
Beispiel #37
0
import XMLParser

XMLParser.run()
Beispiel #38
0
        for iob in traind['iob']:
            polarities = semeval_util.create_sentiment_sequence(
                iob, senti_dictionary, negate_wds)
            translated = []
            for p, n in polarities:
                if p > n:
                    translated.append('positive')
                elif n > p:
                    translated.append('negative')
                else:
                    translated.append('neutral')
            results.append(translated)
        semeval_util.compute_sent_acc(traind['polarity'], results)
        XMLParser.create_xml(traind['orig'],
                             traind['iob'],
                             traind['id'],
                             traind['idx'],
                             sentiments=results,
                             outfile='baseline.xml')
        sys.exit()
    else:
        results = task4_stask2.train_and_trial(train_file, test_file)

    #create results file
    f = open(test_file, 'rb')
    testd = cPickle.load(f)
    f.close()
    XMLParser.create_xml(testd['orig'],
                         testd['iob'],
                         testd['id'],
                         testd['idx'],
                         sentiments=results,
                                            gridCol, total_grid_rows,
                                            total_grid_cols)

            eachObj['name'] = reverseMappingDict[classLabelPredsEachGrid[
                gridRow, gridCol]]
            eachObj['intClass'] = classLabelPredsEachGrid[gridRow, gridCol]
            eachObj['probClass'] = classProbsEachGrid[gridRow, gridCol]
            eachObj['ObjectnessProb'] = probOfObjectPresent[gridRow, gridCol,
                                                            0]
            objectList.append(eachObj)

    return objectList


if __name__ == '__main__':

    xmlFile = "C:/Users/ntihish/Documents/IUB/Deep Learning/Project/Git Repo/product-recognition/sample_files/twoObjectsCorrect.xml"

    imgFile = "C:/Users/ntihish/Documents/IUB/Deep Learning/Project/Git Repo/product-recognition/sample_files/twoObjectsCorrect.jpg"

    imageDict, objectList = XMLParser.parseXMLtoDict(xmlFile)
    TargetArr = generateTargetVariable.genTargetArray(imgFile, imageDict,
                                                      objectList, 3, 3, {
                                                          'dog': 0,
                                                          'cat': 1
                                                      })

    objectList = decodePredArr(imageDict, TargetArr, classMappingDict)
    gridImg = plotGridAndBound.plotGridOnImg(imgFile, 3, 3, objectList)
    gridImg.savefig("griddedImage")
Beispiel #40
0
def plotFromXML(fileName,simulationTime,chemicalList):
	historyFile = getHistoryFileName(fileName)
	sim =  XMLParser.getSimulator(fileName)
	sim.simulate(int(simulationTime),historyFile)
	sim.plot(chemicalList)