header = row for idx, colhead in enumerate(header): columns[colhead] = idx else: htid = row[columns['htid']] genrecounts = int(row[columns['genrecounts']]) # This is the number of pages *in the genre.* title = row[columns['title']] author = row[columns['author']] datetype = row[columns['datetype']] startdate = row[columns['startdate']] enddate = row[columns['enddate']] imprintdate = row[columns['imprintdate']] date = utils.infer_date(datetype, startdate, enddate, imprintdate) if date > 1699 and date < 1800: centuries['18c'].append( (date, author, title, genrecounts, htid)) elif date < 1900: centuries['19c'].append( (date, author, title, genrecounts, htid)) elif date < 1923: centuries['20c'].append( (date, author, title, genrecounts, htid)) pagecenturies = dict() totallength = 0 for century, voltuples in centuries.items(): # Each voltuple represent a volume.
header = row for idx, colhead in enumerate(header): columns[colhead] = idx else: htid = row[columns['htid']] genrecounts = int(row[columns['genrecounts']]) # This is the number of pages *in the genre.* title = row[columns['title']] author = row[columns['author']] datetype = row[columns['datetype']] startdate = row[columns['startdate']] enddate = row[columns['enddate']] imprintdate = row[columns['imprintdate']] date = utils.infer_date(datetype, startdate, enddate, imprintdate) if date > 1699 and date < 1800: centuries['18c'].append((date, author, title, genrecounts, htid)) elif date < 1900: centuries['19c'].append((date, author, title, genrecounts, htid)) elif date < 1923: centuries['20c'].append((date, author, title, genrecounts, htid)) pagecenturies = dict() for century, voltuples in centuries.items(): # Each voltuple represent a volume. pagecenturies[century] = list() # For each list of volumes associated with a century, we construct an associated
line = line.rstrip() fields = line.split('\t') volid = fields[0] word = fields[1] count = int(fields[2]) if volid in datedict: date = datedict[volid] elif volid in rows: datetype = table["datetype"][volid] firstdate = table["startdate"][volid] seconddate = table["enddate"][volid] textdate = table["textdate"][volid] date = utils.infer_date(datetype, firstdate, seconddate, textdate) datedict[volid] = date else: print(volid + " missing in metadata.") date = 0 if date > 1699 and date < 1924: add_counts(wordcounts, date, word, count) else: undated.add(volid) with open('/Volumes/TARDIS/work/forandrew/colors.tsv', mode = 'w', encoding = 'utf-8') as f: for year, subdictionary in wordcounts.items(): for word, count in subdictionary.items(): outline = str(year) + '\t' + word + '\t' + str(count) + '\n' f.write(outline)
line = line.rstrip() fields = line.split('\t') volid = fields[0] word = fields[1] count = int(fields[2]) if volid in datedict: date = datedict[volid] elif volid in rows: datetype = table["datetype"][volid] firstdate = table["startdate"][volid] seconddate = table["enddate"][volid] textdate = table["textdate"][volid] date = utils.infer_date(datetype, firstdate, seconddate, textdate) datedict[volid] = date else: print(volid + " missing in metadata.") date = 0 if date > 1699 and date < 1924: add_counts(wordcounts, date, word, count) else: undated.add(volid) with open('/Volumes/TARDIS/work/forandrew/colors.tsv', mode='w', encoding='utf-8') as f: for year, subdictionary in wordcounts.items(): for word, count in subdictionary.items():