Esempio n. 1
0
def main(args):
    argsmap = parseargs(args)
    # print(args_dict)

    files = argsmap.get('files')
    if (not files):
        sys.exit(0)

    no_files_with_ratings = read_all_files(files)
    print("\nNumber of files with ratings = %d" % no_files_with_ratings)
Esempio n. 2
0
def main(args):
    argsmap = parseargs(args)
    mapfile = argsmap.get("map")
    modelfile = argsmap.get("savemodel")
    if (not mapfile or not modelfile):
        print('Both map and savemodel must be specified...')
        sys.exit(1)
    mapfile = mapfile[0]
    modelfile = modelfile[0]
    alg = argsmap.get("alg")
    if (not alg):
        alg = 'svc'
    else:
        alg = alg[0]
    mapper = SubtopicMapper(mapfile, modelfile, alg)
def main(args):
    argsmap = parseargs(args)

    files = argsmap.get('files')
    if (not files):
        sys.exit(0)

    topic = argsmap.get("topic")
    if (not topic):
        topic = "IT Assessment"
    else:
        topic = topic[0]

    summaryfile = argsmap.get("summary")
    if (not summaryfile):
        print("Summary file must be specified...")
        sys.exit(1)
    summaryfile = summaryfile[0]
    summary_map = find_topics.get_summary_map(summaryfile)

    tmodelfile = argsmap.get("tmodel")
    if (not tmodelfile):
        print('Topic Model must be specified. using --tmmodel ..')
        sys.exit(1)
    tmodelfile = tmodelfile[0]
    (origmap, sorted_y, vectorizer, le,
     grid_search) = read_model_file(tmodelfile)
    topics = find_topics.toc_entries(origmap)
    # print(topics)
    mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)

    subtopicReader = SubtopicReader(topic, mapper, summary_map)

    smodelfile = argsmap.get("smodel")
    if (not smodelfile):
        print('Subtopic Model must be specified using --smodel ...')
        sys.exit(1)
    smodelfile = smodelfile[0]
    subtopicPredictor = SubtopicPredictor(smodelfile)

    for filename in files:
        subtopic_dict = subtopicReader.mapped_subtopics(
            filename, subtopicPredictor)
        subtopic_columns = subtopicReader.get_column_names(subtopicPredictor)
        print(json.dumps(subtopic_dict, indent=2))
        print('---------------------------------------')
        print(subtopic_columns)
Esempio n. 4
0
def main(args):
  argsmap = parseargs(args)
  # print(args_dict)

  files = argsmap.get('files')
  if (not files):
    sys.exit(0)
 
  reveal = argsmap.get('reveal')
  if (reveal == []):
    read_all_files(files, True)
  else:
    outfile = argsmap.get('out')
    if (outfile):
      outfile = outfile[0]
    distinctfile = argsmap.get('distinct')
    if (distinctfile):
      distinctfile = distinctfile[0]
    ratings = Ratings()
    ratings.read_all_files(files, False, outfile, distinctfile)
Esempio n. 5
0
def main(args):
    argsmap = parseargs(args)

    files = argsmap.get('files')
    if (not files):
        sys.exit(0)

    ratings_mapper_file = argsmap.get("rmap")
    if ratings_mapper_file:
        ratings_mapper_file = ratings_mapper_file[0]
    if not ratings_mapper_file:
        print(
            "Ratings Mapper File file name must be entered using the --rmap option..."
        )
        sys.exit(1)

    ratings = Ratings(ratings_mapper_file)

    for filename in files:
        print("Processing file: " + filename)
        print("============================")
        process_ratings_for_file(ratings, filename)
Esempio n. 6
0
def main(args):
  argsmap = parseargs(args)
  # print(args_dict)

  files = argsmap.get('files')
  if (not files):
    sys.exit(0)

  topic = argsmap.get("topic")
  if (not topic):
    topic = "IT Assessment"
  else:
    topic = topic[0]

  summaryfile = argsmap.get("summary")
  if (not summaryfile):
    print("Summary file must be specified...")
    sys.exit(1)
  summaryfile = summaryfile[0]
  summary_map = find_topics.get_summary_map(summaryfile)
  # print(summary_map)

  modelfile = argsmap.get("model")
  if (not modelfile):
    print('Model must be specified...')
    sys.exit(1)
  modelfile = modelfile[0]
  (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile)
  topics = find_topics.toc_entries(origmap)
  # print(topics)
  mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)

  subtopicReader = SubtopicReader(topic, mapper, summary_map)
  all_subtopics = subtopicReader.read_all_files(files, 'print_detail' in argsmap.keys()) 
  if 'print_summary' in argsmap.keys():
    subtopicReader.print_summary()
Esempio n. 7
0
def main(args):
    argsmap = parseargs(args)
    # print(args_dict)

    files = argsmap.get('files')
    if (not files):
        sys.exit(0)

    summaryfile = argsmap.get("summary")[0]
    summary_map = get_summary_map(summaryfile)
    # print(summary_map)

    modelfile = argsmap.get("model")
    if (not modelfile):
        print('Model must be specified...')
        sys.exit(1)
    modelfile = modelfile[0]
    (origmap, sorted_y, vectorizer, le,
     grid_search) = read_model_file(modelfile)
    topics = toc_entries(origmap)
    # print(topics)
    mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)

    read_all_files(files, mapper, summary_map)
  print("Overall Prediction")
  print("==================")
  for case in test_cases():
    prediction, score, type = predictor.get_subtopic(case)
    print("%-50s: %30s - (%d, %s)" % (case, prediction, score, type))
  print("==================")
  print("")

def run_guess_tests(predictor):
  print("Guess Prediction")
  print("==================")
  for case in test_cases():
    prediction, score, type = predictor.get_subtopic_guess(case)
    print("%-50s: %30s - (%0.3f, %s)" % (case, prediction, score, type))
  print("==================")
  print("")

if __name__ == '__main__':
  import sys
  args = sys.argv[1:]
  argsmap = parseargs(args)
  modelfile = argsmap.get("model")
  if (not modelfile):
    print('Model must be specified...')
    sys.exit(1)
  modelfile = modelfile[0]
  predictor = SubtopicPredictor(modelfile)
  run_tests(predictor)
  run_guess_tests(predictor)

Esempio n. 9
0
def main(args):
  global NEWLINE_WITHIN_COLUMN
  argsmap = parseargs(args)

  files = argsmap.get('files')
  if (not files):
    sys.exit(0)

  summaryfile = argsmap.get("summary")
  if (not summaryfile or len(summaryfile) == 0):
    print('Summary file must be specified...')
    sys.exit(1)
  summary_map = get_summary_map(summaryfile[0])
  # print(summary_map)

  modelfile = argsmap.get("model")
  if (not modelfile):
    print('Model must be specified...')
    sys.exit(1)
  modelfile = modelfile[0]
  (origmap, sorted_y, vectorizer, le, grid_search) = read_model_file(modelfile)
  topics = toc_entries(origmap)
  mapper = Mapper(origmap, sorted_y, vectorizer, le, grid_search)

  nosplit = argsmap.get('nosplit')
  if nosplit == []:
    nosplit = True
  else:
    nosplit = False

  if not nosplit:
    topic_split_times = argsmap.get('split')
    if (not topic_split_times):
      topic_split_times = 4
    else:
      topic_split_times = int(topic_split_times[0])
  else:
    topic_split_times = 0

  NL = argsmap.get('NL') ## Set newline character for multiline columns
  if (NL):
    NL = NL[0]
    if (NL):
      NEWLINE_WITHIN_COLUMN = NL

  outfile = argsmap.get("out")
  if (outfile):
    outfile = outfile[0]

  exfile = argsmap.get("err")
  if exfile:
    exfile = exfile[0]
  if not exfile:
    print("Exception file name must be entered using the --err option...")
    sys.exit(1)

  ratings_mapper_file = argsmap.get("rmap")
  if ratings_mapper_file:
    ratings_mapper_file = ratings_mapper_file[0]
  if not ratings_mapper_file:
    print("Ratings Mapper File file name must be entered using the --rmap option...")
    sys.exit(1)

  ratings = Ratings(ratings_mapper_file)

  global CSV_FIELD_DELIMITER
  field_delim = argsmap.get('fd')
  if field_delim:
    field_delim = field_delim[0]
    if field_delim:
      CSV_FIELD_DELIMITER = field_delim

  global FD_REPLACED
  fd_replaced = argsmap.get('fdr')
  if fd_replaced:
    fd_replaced = fd_replaced[0]
    if fd_replaced:
      FD_REPLACED = fd_replaced
  
  smodels = argsmap.get("smodels")
  stopics = argsmap.get("stopics")
  get_headers_for_files(files, topics, mapper, summary_map, outfile, exfile, nosplit, topic_split_times, ratings, smodels, stopics)