Esempi in Python per raw_doc_lengths

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: utils

Metodo/funzione: raw_doc_lengths

Esempi su hotexamples.com: 8

raw_doc_lengths in Python: 8 esempi trovati. Questi sono i migliori esempi reali in Python per utils.raw_doc_lengths, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: analysis.py Progetto: riccardoangius/pairses

def overall_collection_stats():
    doc_lengths = []
    for prefix in all_prefixes:
        doc_lengths += raw_doc_lengths(prefix).values()
    print "All documents length mean: %.2f" % n.mean(doc_lengths)
    print "All documents length std: %.2f" % n.std(doc_lengths)
    print "All documents length median: %.2f" % n.median(doc_lengths)
    medlist, meanlist, stdlist = zip(*map(length_stats, all_prefixes))
    print "Mean of means: %.2f" % n.mean(meanlist)
    print "Mean of std: %.2f" % n.mean(stdlist)
    print "Mean of median: %.2f" % n.mean(medlist)

Esempio n. 2

Mostra file

File: analysis.py Progetto: adamar/wikidump

def overall_collection_stats():
  doc_lengths = []
  for prefix in all_prefixes:
    doc_lengths += raw_doc_lengths(prefix).values()
  print "All documents length mean: %.2f" % n.mean(doc_lengths)
  print "All documents length std: %.2f" % n.std(doc_lengths)
  print "All documents length median: %.2f" % n.median(doc_lengths)
  medlist, meanlist, stdlist = zip(*map(length_stats,all_prefixes))
  print "Mean of means: %.2f" % n.mean(meanlist)
  print "Mean of std: %.2f" % n.mean(stdlist)
  print "Mean of median: %.2f" % n.mean(medlist)

Esempio n. 3

Mostra file

File: analysis.py Progetto: adamar/wikidump

def doclength_histogram(path, prefix):
  values = p.array(raw_doc_lengths(prefix).values())
  num_bins = 1000
  bin_upper_limit = p.mean(values) + 3 * p.std(values)
  print "UL: "+ str(bin_upper_limit)
  bins = p.array(range(1,1001)) * (bin_upper_limit/1000.0)
  p.hist(values, bins)
  p.xlabel('Document size (unicode codepoints)')
  p.ylabel('Number of documents')
  p.title('Document Size Histogram for %s' % prefix)
  p.savefig(path, dpi=72)
  p.close()

Esempio n. 4

Mostra file

File: analysis.py Progetto: riccardoangius/pairses

def doclength_histogram(path, prefix):
    values = p.array(raw_doc_lengths(prefix).values())
    num_bins = 1000
    bin_upper_limit = p.mean(values) + 3 * p.std(values)
    print "UL: " + str(bin_upper_limit)
    bins = p.array(range(1, 1001)) * (bin_upper_limit / 1000.0)
    p.hist(values, bins)
    p.xlabel('Document size (unicode codepoints)')
    p.ylabel('Number of documents')
    p.title('Document Size Histogram for %s' % prefix)
    p.savefig(path, dpi=72)
    p.close()

Esempio n. 5

Mostra file

File: analysis.py Progetto: adamar/wikidump

def length_stats(prefix):
  doc_lengths = raw_doc_lengths(prefix).values()
  median = n.median(doc_lengths)
  mean   = n.mean(doc_lengths)
  std    = n.std(doc_lengths)
  return median, mean, std

Esempio n. 6

Mostra file

def docs_under_thresh(prefix, thresh):
    doc_lengths = raw_doc_lengths(prefix).values()
    return len(filter(lambda x: x < thresh, doc_lengths))

Esempio n. 7

Mostra file

def indices_under_thresh(prefix, thresh):
    doc_lengths = raw_doc_lengths(prefix).iteritems()
    return [id for (id, len) in doc_lengths if len < thresh]

Esempio n. 8

Mostra file

File: analysis.py Progetto: riccardoangius/pairses

def length_stats(prefix):
    doc_lengths = raw_doc_lengths(prefix).values()
    median = n.median(doc_lengths)
    mean = n.mean(doc_lengths)
    std = n.std(doc_lengths)
    return median, mean, std