def printRecordsJson(located_recs): recs = [] for r, coder, locatable in located_recs: rec = { 'id': r.photo_id(), 'folder': r.location().replace('Folder: ', ''), 'date': record.CleanDate(r.date()), 'title': record.CleanTitle(r.title()), 'description': r.description(), 'url': r.preferred_url, 'extracted': { 'date_range': [None, None] } } if r.note(): rec['note'] = r.note() start, end = r.date_range() rec['extracted']['date_range'][0] = '%04d-%02d-%02d' % ( start.year, start.month, start.day) rec['extracted']['date_range'][1] = '%04d-%02d-%02d' % ( end.year, end.month, end.day) if coder: rec['extracted']['latlon'] = locatable.getLatLon() rec['extracted']['located_str'] = str(locatable) rec['extracted']['technique'] = coder recs.append(rec) print json.dumps(recs, indent=2)
def codeRecord(self, r): title = coders.sf_streets.clean_street( record.CleanTitle(r.title()).lower()) # look for an exact address m = re.search(self._addr_re, title) if m: addr = m.group(0) if not should_reject_address(addr): return coders.locatable.fromAddress(addr) # Common cross-street patterns for idx, pat in enumerate(self._forms): m = re.search(pat, title) if m: self._stats[str(1 + idx)] += 1 if idx != 0: return coders.locatable.fromCross(m.group(1), m.group(2), source='%s (form %d)' % (m.group(0), idx)) else: return coders.locatable.fromStreetAndCrosses( m.group(1), [m.group(2), m.group(3)]) # No dice. return None
def printRecordsText(located_recs): for r, coder, locatable in located_recs: date = record.CleanDate(r.date()) title = record.CleanTitle(r.title()) folder = r.location() if folder: folder = record.CleanFolder(folder) if locatable: loc = (str(locatable.getLatLon()) or '') + '\t' + str(locatable) else: loc = 'n/a\tn/a' print '\t'.join([ r.photo_id(), date, folder, title, r.preferred_url, coder or 'failed', loc ])
def printRecordsText(located_recs): for r, coder, location_data in located_recs: date = record.CleanDate(r.date()) title = record.CleanTitle(r.title()) folder = r.location() if folder: folder = record.CleanFolder(folder) if location_data: lat = location_data['lat'] lon = location_data['lon'] loc = (str((lat, lon)) or '') + '\t' + location_data['address'] else: loc = 'n/a\tn/a' print '\t'.join([ r.photo_id(), date, folder, title, r.preferred_url, coder or 'failed', loc ])
def codeRecord(self, r): loc = r.location() loc = loc.replace('Folder: S.F. Earthquakes-1906-Streets', 'Folder: S.F. Streets') loc = loc.replace('Sheet: S.F. Streets', 'Folder: S.F. Streets') if not loc.startswith("Folder: S.F. Streets-"): return None st = get_street_cat(loc) if not st: return None st = clean_street_cat(st.lower()) title = record.CleanTitle(r.title()).lower() matches = self.extract_matches(title, st) if not matches: return None # matches is a mix of locatables and cross-street strings. # locatables take precedence, since they're more precise. for match in matches: if type(match) == coders.locatable.Locatable: return match # We've got a street and cross-streets assert not (None in matches), '%s: %s' % (r.photo_id(), title) return coders.locatable.fromStreetAndCrosses(st, matches)
def printRecordsJson(located_recs): recs = [] for r, coder, location_data in located_recs: rec = { 'id': r.photo_id(), 'folder': removeNonAscii(r.location().replace('Folder: ', '')), 'date': record.CleanDate(r.date()), 'title': removeNonAscii(record.CleanTitle(r.title())), 'description': removeNonAscii(r.description()), 'url': r.preferred_url, 'extracted': { 'date_range': [None, None] } } if r.note(): rec['note'] = r.note() start, end = r.date_range() rec['extracted']['date_range'][0] = '%04d-%02d-%02d' % ( start.year, start.month, start.day) rec['extracted']['date_range'][1] = '%04d-%02d-%02d' % ( end.year, end.month, end.day) if coder: rec['extracted']['latlon'] = (location_data['lat'], location_data['lon']) rec['extracted']['located_str'] = removeNonAscii( location_data['address']) rec['extracted']['technique'] = coder try: x = json.dumps(rec) except Exception as e: sys.stderr.write('%s\n' % rec) raise e recs.append(rec) print json.dumps(recs, indent=2)
#!/usr/bin/python import sys sys.path += (sys.path[0] + '/..') import csv import record rs = record.AllRecords() csv_writer = csv.writer(open('entries.csv', 'wb')) csv_writer.writerow(['photo_id', 'date', 'folder', 'title', 'library_url']) for r in rs: date = record.CleanDate(r.date()) title = record.CleanTitle(r.title()) folder = record.CleanFolder(r.location()) csv_writer.writerow([r.photo_id(), date, folder, title, r.preferred_url])
continue print folder print ' Located:' located = set() for id, latlon, locatable_str in ccs: print ' %s (%s)' % (locatable_str, latlon) located.add(id) print ' Others: %d' % (len(recs) - len(ccs)) dated_rs = [(r.date(), r) for r in recs] for date, r in sorted(dated_rs): c = ' ' if r.photo_id() in located: c = '*' print ' %s%s %15s %s %s' % ( c, r.photo_id(), record.CleanDate( r.date()), record.CleanTitle(r.title()), r.preferred_url) print '' response = raw_input('generalize? (y or n or photo_id): ') if response in ['y', 'n', 'yes', 'no'] or '-' in response: file('generalizations.txt', 'a').write('%s:%s\n' % (folder, response)) else: print '(Skipping)' print '' print '' print '' sys.stderr.write('Saveable records: %d\n' % saved) sys.stderr.write('Saved: %d\n' % len(generalizations))