Exemple #1
0
            header = row
            for idx, colhead in enumerate(header):
                columns[colhead] = idx

        else:
            htid = row[columns['htid']]
            genrecounts = int(row[columns['genrecounts']])
            # This is the number of pages *in the genre.*

            title = row[columns['title']]
            author = row[columns['author']]
            datetype = row[columns['datetype']]
            startdate = row[columns['startdate']]
            enddate = row[columns['enddate']]
            imprintdate = row[columns['imprintdate']]
            date = utils.infer_date(datetype, startdate, enddate, imprintdate)
            if date > 1699 and date < 1800:
                centuries['18c'].append(
                    (date, author, title, genrecounts, htid))
            elif date < 1900:
                centuries['19c'].append(
                    (date, author, title, genrecounts, htid))
            elif date < 1923:
                centuries['20c'].append(
                    (date, author, title, genrecounts, htid))

pagecenturies = dict()
totallength = 0
for century, voltuples in centuries.items():
    # Each voltuple represent a volume.
            header = row
            for idx, colhead in enumerate(header):
                columns[colhead] = idx

        else:
            htid = row[columns['htid']]
            genrecounts = int(row[columns['genrecounts']])
            # This is the number of pages *in the genre.*

            title = row[columns['title']]
            author = row[columns['author']]
            datetype = row[columns['datetype']]
            startdate = row[columns['startdate']]
            enddate = row[columns['enddate']]
            imprintdate = row[columns['imprintdate']]
            date = utils.infer_date(datetype, startdate, enddate, imprintdate)
            if date > 1699 and date < 1800:
                centuries['18c'].append((date, author, title, genrecounts, htid))
            elif date < 1900:
                centuries['19c'].append((date, author, title, genrecounts, htid))
            elif date < 1923:
                centuries['20c'].append((date, author, title, genrecounts, htid))

pagecenturies = dict()

for century, voltuples in centuries.items():
    # Each voltuple represent a volume.

    pagecenturies[century] = list()

    # For each list of volumes associated with a century, we construct an associated
		line = line.rstrip()
		fields = line.split('\t')

		volid = fields[0]
		word = fields[1]
		count = int(fields[2])

		if volid in datedict:
			date = datedict[volid]
		elif volid in rows:
			datetype = table["datetype"][volid]
			firstdate = table["startdate"][volid]
			seconddate = table["enddate"][volid]
			textdate = table["textdate"][volid]
			date = utils.infer_date(datetype, firstdate, seconddate, textdate)
			datedict[volid] = date
		else:
			print(volid + " missing in metadata.")
			date = 0

		if date > 1699 and date < 1924:
			add_counts(wordcounts, date, word, count)
		else:
			undated.add(volid)

with open('/Volumes/TARDIS/work/forandrew/colors.tsv', mode = 'w', encoding = 'utf-8') as f:
	for year, subdictionary in wordcounts.items():
		for word, count in subdictionary.items():
			outline = str(year) + '\t' + word + '\t' + str(count) + '\n'
			f.write(outline)
        line = line.rstrip()
        fields = line.split('\t')

        volid = fields[0]
        word = fields[1]
        count = int(fields[2])

        if volid in datedict:
            date = datedict[volid]
        elif volid in rows:
            datetype = table["datetype"][volid]
            firstdate = table["startdate"][volid]
            seconddate = table["enddate"][volid]
            textdate = table["textdate"][volid]
            date = utils.infer_date(datetype, firstdate, seconddate, textdate)
            datedict[volid] = date
        else:
            print(volid + " missing in metadata.")
            date = 0

        if date > 1699 and date < 1924:
            add_counts(wordcounts, date, word, count)
        else:
            undated.add(volid)

with open('/Volumes/TARDIS/work/forandrew/colors.tsv',
          mode='w',
          encoding='utf-8') as f:
    for year, subdictionary in wordcounts.items():
        for word, count in subdictionary.items():