コード例 #1
0
def getMappingDicts_reGen(corpusdir, mapsdir, cons):
    # check the old constraint.dict exists or not
    cons_file = corpusdir + "/constraint.set"
    if (not os.path.exists(cons_file)):
        # Regenerate
        (word_wid_dic, wid_did_dic, did_doc_dic) = \
            getNewMappingDicts(corpusdir, mapsdir)
    else:
        # check whether the old constraint is the same as consdict
        old_cons = read_pickle(cons_file)
        if checkSame(cons, old_cons):
            # check the mapping dicts exist or not
            word_wid = mapsdir + "/word_wid.dict"
            wid_did = mapsdir + "/wid_did.dict"
            did_doc = mapsdir + "/did_doc.dict"
      
            if (os.path.exists(word_wid) and os.path.exists(wid_did) \
                                         and os.path.exists(did_doc)):
                word_wid_dic = read_pickle(word_wid)
                wid_did_dic = read_pickle(wid_did)
                did_doc_dic = read_pickle(did_doc)
            else:
                (word_wid_dic, wid_did_dic, did_doc_dic) = \
                    getNewMappingDicts(corpusdir, mapsdir)
        else:
            (word_wid_dic, wid_did_dic, did_doc_dic) = \
                getNewMappingDicts(corpusdir, mapsdir)
    write_pickle(cons, cons_file)
    return (word_wid_dic, wid_did_dic, did_doc_dic)
コード例 #2
0
def getMappingDicts(corpusdir, mapsdir):
    # check the mapping dicts exist or not
    word_wid = mapsdir + "/word_wid.dict"
    wid_did = mapsdir + "/wid_did.dict"
    did_doc = mapsdir + "/did_doc.dict"
  
    if (os.path.exists(word_wid) and os.path.exists(wid_did) \
                                 and os.path.exists(did_doc)):
        word_wid_dic = read_pickle(word_wid)
        wid_did_dic = read_pickle(wid_did)
        did_doc_dic = read_pickle(did_doc)
    else:
        (word_wid_dic, wid_did_dic, did_doc_dic) = \
            getNewMappingDicts(corpusdir, mapsdir)
      
    return (word_wid_dic, wid_did_dic, did_doc_dic)
コード例 #3
0
def getNewAddedCons(corpusdir, cons_set, cons_list):
    # check the old constraint.list exists or not
    cons_file = corpusdir + "/constraint.set"
    if (not os.path.exists(cons_file)):
        cons_added_set = cons_set
    else:
        cons_old_set = read_pickle(cons_file)
        cons_added_set = cons_set.difference(cons_old_set)
    # save the new cons set to file
    write_pickle(cons_set, cons_file)
    cons_file = corpusdir + "/constraint.list"
    write_pickle(cons_list, cons_file)
    return cons_added_set
コード例 #4
0
ファイル: resume_topics.py プロジェクト: hxsebastien/topicmod
flags.define_int("num_topics", 0, "Current number of topics")

if __name__ == "__main__":
  flags.InitFlags()

  if re.search("doc", flags.update_strategy):
    update_strategy = 1
  elif re.search("term", flags.update_strategy):
    update_strategy = 0
  else:
    print "Wrong update strategy!"
    exit()

  # Build index if it doesn't already exist
  if os.path.exists(flags.mapping):
    index = read_pickle(flags.mapping)
  else:
    index = build_index(flags.corpus, flags.mapping)

  # Remove offending assignments
  if re.search("clear", flags.resume_type):
    # Read in constraints
    cons_set = get_constraints(flags.cons_file)
    print cons_set
    clear_assignments(flags.input_base, flags.output_base, index,
                      cons_set, update_strategy)
  elif re.search("split", flags.resume_type):
    # Read in constraints
    print flags.wordnet
    [cons_list, cons_set] = get_constraints_from_wn(flags.wordnet)
    print cons_set