def _run(args): tf_stats = tf_idf.TFStats() idf_stats = tf_idf.IDFStats() if args.input_idf_stats is not None: idf_stats.read(args.input_idf_stats) num_done = 0 for line in args.docs: parts = line.strip().split() doc = parts[0] tf_stats.accumulate(doc, parts[1:], args.ngram_order) if not args.accumulate_over_docs: # Write the document-id and the corresponding tf-idf values. print(doc, file=args.tf_idf_file, end=" ") tf_idf.write_tfidf_from_stats( tf_stats, idf_stats, args.tf_idf_file, tf_weighting_scheme=args.tf_weighting_scheme, idf_weighting_scheme=args.idf_weighting_scheme, tf_normalization_factor=args.tf_normalization_factor, expected_document_id=doc, ) tf_stats = tf_idf.TFStats() num_done += 1 if args.accumulate_over_docs: tf_stats.compute_term_stats( idf_stats=idf_stats if args.input_idf_stats is None else None) if args.output_idf_stats is not None: idf_stats.write(args.output_idf_stats) args.output_idf_stats.close() tf_idf.write_tfidf_from_stats( tf_stats, idf_stats, args.tf_idf_file, tf_weighting_scheme=args.tf_weighting_scheme, idf_weighting_scheme=args.idf_weighting_scheme, tf_normalization_factor=args.tf_normalization_factor, ) if num_done == 0: raise RuntimeError("Could not compute TF-IDF for any query documents")
def _run(args): tf_stats = tf_idf.TFStats() idf_stats = tf_idf.IDFStats() if args.input_idf_stats is not None: idf_stats.read(args.input_idf_stats) num_done = 0 for line in args.docs: parts = line.strip().split() doc = parts[0] tf_stats.accumulate(doc, parts[1:], args.ngram_order) if not args.accumulate_over_docs: # Write the document-id and the corresponding tf-idf values. print (doc, file=args.tf_idf_file, end=' ') tf_idf.write_tfidf_from_stats( tf_stats, idf_stats, args.tf_idf_file, tf_weighting_scheme=args.tf_weighting_scheme, idf_weighting_scheme=args.idf_weighting_scheme, tf_normalization_factor=args.tf_normalization_factor, expected_document_id=doc) tf_stats = tf_idf.TFStats() num_done += 1 if args.accumulate_over_docs: tf_stats.compute_term_stats(idf_stats=idf_stats if args.input_idf_stats is None else None) if args.output_idf_stats is not None: idf_stats.write(args.output_idf_stats) args.output_idf_stats.close() tf_idf.write_tfidf_from_stats( tf_stats, idf_stats, args.tf_idf_file, tf_weighting_scheme=args.tf_weighting_scheme, idf_weighting_scheme=args.idf_weighting_scheme, tf_normalization_factor=args.tf_normalization_factor) if num_done == 0: raise RuntimeError("Could not compute TF-IDF for any query documents")