def load_midb(src): session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] cols = [c.name for c in model.MidB.__table__.columns] int_cols = [ c.name for c in model.MidB.__table__.columns if isinstance(c.type, sa.types.Integer) ] cnt = collections.Counter() for row in reader: cnt[(row['disp_num'], row['ccode'])] += 1 row['spell_num'] = cnt[(row['disp_num'], row['ccode'])] start_date = utils.daterng(*(_int(row[k]) for k in ('st_year', 'st_mon', 'st_day'))) row['st_date_min'], row['st_date_min'] = start_date end_date = utils.daterng(*(_int(row[k]) for k in ('end_year', 'end_mon', 'end_day'))) row['end_date_min'], row['end_date_min'] = end_date ## set -9 to NULL for k in int_cols: row[k] = _int(row[k]) session.add(model.MidB(**utils.subset(row, cols))) session.commit()
def load_polity_states(src): session = model.SESSION() data = yaml.load(src) cnt = collections.Counter() cols1 = ('ccode', 'scode', 'country') cols2 = ('ccode', 'start_year', 'end_year') for row in data: cnt[row['ccode']] += 1 if cnt[row['ccode']] == 1: data1 = utils.subset(row, cols1) session.add(model.PolityState(**data1)) if not row['end_year']: row['end_year'] = model.PolitySysMembership.ONGOING data2 = utils.subset(row, cols2) data2['interval'] = cnt[row['ccode']] session.add(model.PolitySysMembership(**data2)) session.commit()
def load_polity_states(src): POLITY_MAX_YEAR = model.PolitySysMembership.ONGOING session = model.SESSION() data = yaml.load(src) cnt = collections.Counter() for x in data: ccode = x['ccode'] cnt[ccode] += 1 x['end_year'] = x['end_year'] if x['end_year'] else POLITY_MAX_YEAR session.add(model.PolitySysMembership(interval = cnt[ccode], **utils.subset(x, ('ccode', '')))) session.commit()
def load_ksgp4use(src): """ Load data for table ksgp4use """ session = model.SESSION() reader = csv2.DictReader(src, delimiter = ' ') reader.fieldnames = [x.lower() for x in reader.fieldnames] cols = [x.name for x in model.KsgP4use.__table__.c] for row in reader: for k, v in row.iteritems(): if v == '.': row[k] = None session.add(model.KsgP4use(**utils.subset(row, cols))) session.commit()
def load_ksgp4use(src): """ Load data for table ksgp4use """ session = model.SESSION() reader = csv2.DictReader(src, delimiter=' ') reader.fieldnames = [x.lower() for x in reader.fieldnames] cols = [x.name for x in model.KsgP4use.__table__.c] for row in reader: for k, v in row.iteritems(): if v == '.': row[k] = None session.add(model.KsgP4use(**utils.subset(row, cols))) session.commit()
def load_war3_partic(src): """ Load COW War Data v. 3, Participants """ session = model.SESSION() def _int(x): try: return int(x) except TypeError: return None def _dates(row, n): if row['yr_beg%d' % n]: y = model.War3ParticDate() y.war_partic = row['war_partic'] y.spell_no = n date_beg = utils.daterng(_int(row['yr_beg%d' % n]), _int(row['mon_beg%d' % n]), _int(row['day_beg%d' % n])) y.date_beg_min, y.date_beg_max = date_beg date_end = utils.daterng(_int(row['yr_end%d' % n]), _int(row['mon_end%d' % n]), _int(row['day_end%d' % n])) y.date_end_min, y.date_end_max = date_end session.add(y) reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] war_cols = [x.name for x in model.War3Partic.__table__.c] war_date_cols = [x.name for x in model.War3ParticDate.__table__.c] cnt = collections.Counter() for row in reader: ## Account for multiple country-war participations key = (row['war_no'], row['state_num']) cnt[key] += 1 row['partic_no'] = cnt[key] row['war_partic'] = war_partic_pkey(row['war_no'], row['state_num'], row['partic_no']) ## replace missing values for k, v in row.iteritems(): row[k] = utils.replmiss(v, lambda x: x in ("-999", "-888")) session.add(model.War3Partic(**utils.subset(row, war_cols))) ## Dates for i in (1, 2): _dates(row, i) session.commit()
def load_war3_partic(src): """ Load COW War Data v. 3, Participants """ session = model.SESSION() def _int(x): try: return int(x) except TypeError: return None def _dates(row, n): if row['yr_beg%d' % n]: y = model.War3ParticDate() y.war_partic = row['war_partic'] y.spell_no = n date_beg = utils.daterng(_int(row['yr_beg%d' % n]), _int(row['mon_beg%d' % n]), _int(row['day_beg%d' % n])) y.date_beg_min, y.date_beg_max = date_beg date_end = utils.daterng(_int(row['yr_end%d' % n]), _int(row['mon_end%d' % n]), _int(row['day_end%d' % n])) y.date_end_min, y.date_end_max = date_end session.add(y) reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] war_cols = [x.name for x in model.War3Partic.__table__.c] war_date_cols = [x.name for x in model.War3ParticDate.__table__.c] cnt = collections.Counter() for row in reader: ## Account for multiple country-war participations key = (row['war_no'], row['state_num']) cnt[key] += 1 row['partic_no'] = cnt[key] row['war_partic'] = war_partic_pkey(row['war_no'], row['state_num'], row['partic_no']) ## replace missing values for k,v in row.iteritems(): row[k] = utils.replmiss(v, lambda x: x in ("-999", "-888")) session.add(model.War3Partic(**utils.subset(row, war_cols))) ## Dates for i in (1, 2): _dates(row, i) session.commit()
def load_war3(src): """ Load COW War Data v. 3 """ session = model.SESSION() def _int(x): try: return int(x) except TypeError: return None def _dates(row, n): if row['yr_beg%d' % n]: y = model.War3Date() y.war_no = row['war_no'] y.spell_no = n date_beg = utils.daterng(_int(row['yr_beg%d' % n]), _int(row['mon_beg%d' % n]), _int(row['day_beg%d' % n])) y.date_beg_min, y.date_beg_max = date_beg date_end = utils.daterng(_int(row['yr_end%d' % n]), _int(row['mon_end%d' % n]), _int(row['day_end%d' % n])) y.date_end_min, y.date_end_max = date_end session.add(y) reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] war_cols = [x.name for x in model.War3.__table__.c] war_date_cols = [x.name for x in model.War3Date.__table__.c] for row in reader: for k, v in row.iteritems(): row[k] = utils.replmiss(v, lambda x: x in ("-999", "-888")) ## Inter-state war does not have a war_type if 'war_type' not in row.keys(): row['war_type'] = 1 row['oceania'] = row['oceania'] if row['oceania'] else False session.add(model.War3(**utils.subset(row, war_cols))) ## Dates for i in (1, 2): _dates(row, i) session.commit()
def load_war3(src): """ Load COW War Data v. 3 """ session = model.SESSION() def _int(x): try: return int(x) except TypeError: return None def _dates(row, n): if row['yr_beg%d' % n]: y = model.War3Date() y.war_no = row['war_no'] y.spell_no = n date_beg = utils.daterng(_int(row['yr_beg%d' % n]), _int(row['mon_beg%d' % n]), _int(row['day_beg%d' % n])) y.date_beg_min, y.date_beg_max = date_beg date_end = utils.daterng(_int(row['yr_end%d' % n]), _int(row['mon_end%d' % n]), _int(row['day_end%d' % n])) y.date_end_min, y.date_end_max = date_end session.add(y) reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] war_cols = [x.name for x in model.War3.__table__.c] war_date_cols = [x.name for x in model.War3Date.__table__.c] for row in reader: for k,v in row.iteritems(): row[k] = utils.replmiss(v, lambda x: x in ("-999", "-888")) ## Inter-state war does not have a war_type if 'war_type' not in row.keys(): row['war_type'] = 1 row['oceania'] = row['oceania'] if row['oceania'] else False session.add(model.War3(**utils.subset(row, war_cols))) ## Dates for i in (1, 2): _dates(row, i) session.commit()
def load_midi(src): session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] cols = [c.name for c in model.MidI.__table__.columns] int_cols = [c.name for c in model.MidI.__table__.columns if isinstance(c.type, sa.types.Integer)] for row in reader: start_date = utils.daterng(*(_int(row[k]) for k in ('st_year', 'st_mon', 'st_day'))) row['st_date_min'], row['st_date_min'] = start_date end_date = utils.daterng(*(_int(row[k]) for k in ('end_year', 'end_mon', 'end_day'))) row['end_date_min'], row['end_date_min'] = end_date ## set -9 to NULL for k in int_cols: row[k] = _int(row[k]) session.add(model.MidI(**utils.subset(row, cols))) session.commit()
def load_polityd(src): session = model.SESSION() reader = xls.DictReader(src) columns = [x.name for x in model.PolityCase.__table__.c] cnt = collections.Counter() for row in reader: ccode = row['ccode'] cnt[ccode] += 1 row['pcase'] = cnt[ccode] row['present'] = row['present'] == '1' for i in ('e', 'b'): row['%sday' % i] = utils.replmiss(row['%sday' % i], lambda x: int(x) == 99) row['%smonth' % i] = utils.replmiss(row['%smonth' % i], lambda x: int(x) == 99) row['%syear' % i] = utils.replmiss(row['%syear' % i], lambda x: int(x) == 9999) if row['byear']: row['bdate'] = utils.row_ymd(row, 'byear', 'bmonth', 'bday') if row['eyear']: row['edate'] = utils.row_ymd(row, 'eyear', 'emonth', 'eday') session.add(model.PolityCase(**utils.subset(row, columns))) session.commit()
def load_contdir(src): """ Load direct contiguity data from csv file""" session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') cols = [x.name for x in model.ContDir.__table__.c] for row in reader: start_mon = datetime.date(int(row['begin'][:4]), int(row['begin'][4:]), 1) end_mon = datetime.date(int(row['end'][:4]), int(row['end'][4:]), 1) if end_mon.month == 12: end_mon = datetime.date(end_mon.year + 1, 1, 1) else: end_mon = datetime.date(end_mon.year, end_mon.month + 1, 1) end_mon += datetime.timedelta(days=-1) row['end_date'] = end_mon row['start_date'] = start_mon data = utils.subset(row, cols) session.add(model.ContDir(**data)) session.commit()
def load_contdir(src): """ Load direct contiguity data from csv file""" session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') cols = [x.name for x in model.ContDir.__table__.c] for row in reader: start_mon = datetime.date(int(row['begin'][:4]), int(row['begin'][4:]), 1) end_mon = datetime.date(int(row['end'][:4]), int(row['end'][4:]), 1) if end_mon.month == 12: end_mon = datetime.date(end_mon.year + 1, 1, 1) else: end_mon = datetime.date(end_mon.year, end_mon.month + 1, 1) end_mon += datetime.timedelta(days = -1) row['end_date'] = end_mon row['start_date'] = start_mon data = utils.subset(row, cols) session.add(model.ContDir(**data)) session.commit()
def load_war4_inter(src): """ Add Inter-state war data to war4_* tables updates tables cow_war4, cow_belligerents, cow_war4_participation, cow_war4_partic_dates """ session = model.SESSION() def _int(x): y = int(x) return y if y >= 0 else None def partic(row): y = model.War4Partic() belligerent = belligerent_key(row['ccode'], row['state_name']) war_side = war_side_pkey(int(row['war_num']), int(row['side'])) y.war_partic = war_partic_pkey(war_side, belligerent) y.war_side = war_side y.belligerent = belligerent for k, v in WHERE_FOUGHT[_int(row['where_fought'])].iteritems(): setattr(y, k, v) y.outcome = row['outcome'] y.bat_death = _int(row['bat_death']) y.initiator = (int(row['initiator']) == 1) return y def add_partic_dates(row, n): if row['start_year%d' % n] != '-8': y = model.War4ParticDate() war_side = war_side_pkey(int(row['war_num']), int(row['side'])) belligerent = belligerent_key(row['ccode'], row['state_name']) y.war_partic = war_partic_pkey(war_side, belligerent) y.partic_num = n start_date = utils.daterng(_int(row['start_year%d' % n]), _int(row['start_month%d' % n]), _int(row['start_day%d' % n])) y.start_date_min, y.start_date_max = start_date if row['end_year%d' % n] == "-7": y.end_date_min = y.end_date_max = model.War4.ONGOING_DATE y.ongoing = True else: end_date = utils.daterng(_int(row['end_year%d' % n]), _int(row['end_month%d' % n]), _int(row['end_day%d' % n])) y.end_date_min, y.end_date_max = end_date y.ongoing = False session.add(y) cols = ("war_num", "war_name", "war_type") cnt = collections.Counter() cnt_bellig = collections.Counter() reader = csv2.DictReader(src) reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] for row in reader: war_num = row['war_num'] belligerent = belligerent_key(row['ccode'], row['state_name']) cnt[war_num] += 1 cnt_bellig[belligerent] +=1 if cnt[war_num] == 1: session.add(model.War4(intnl=True, **utils.subset(row, cols))) for side in (1, 2): session.add(model.War4Side(war_side = war_side_pkey(side=side, war_num=war_num), side=side, war_num=war_num)) session.flush() if cnt_bellig[belligerent] == 1: session.add(model.War4Belligerent(belligerent = belligerent, belligerent_name = row['state_name'], ccode = row['ccode'])) session.flush() session.add(partic(row)) for i in (1, 2): add_partic_dates(row, i) session.commit()
def load_war4_inter(src): """ Add Inter-state war data to war4_* tables updates tables cow_war4, cow_belligerents, cow_war4_participation, cow_war4_partic_dates """ session = model.SESSION() def _int(x): y = int(x) return y if y >= 0 else None def partic(row): y = model.War4Partic() belligerent = belligerent_key(row['ccode'], row['state_name']) war_side = war_side_pkey(int(row['war_num']), int(row['side'])) y.war_partic = war_partic_pkey(war_side, belligerent) y.war_side = war_side y.belligerent = belligerent for k, v in WHERE_FOUGHT[_int(row['where_fought'])].iteritems(): setattr(y, k, v) y.outcome = row['outcome'] y.bat_death = _int(row['bat_death']) y.initiator = (int(row['initiator']) == 1) return y def add_partic_dates(row, n): if row['start_year%d' % n] != '-8': y = model.War4ParticDate() war_side = war_side_pkey(int(row['war_num']), int(row['side'])) belligerent = belligerent_key(row['ccode'], row['state_name']) y.war_partic = war_partic_pkey(war_side, belligerent) y.partic_num = n start_date = utils.daterng(_int(row['start_year%d' % n]), _int(row['start_month%d' % n]), _int(row['start_day%d' % n])) y.start_date_min, y.start_date_max = start_date if row['end_year%d' % n] == "-7": y.end_date_min = y.end_date_max = model.War4.ONGOING_DATE y.ongoing = True else: end_date = utils.daterng(_int(row['end_year%d' % n]), _int(row['end_month%d' % n]), _int(row['end_day%d' % n])) y.end_date_min, y.end_date_max = end_date y.ongoing = False session.add(y) cols = ("war_num", "war_name", "war_type") cnt = collections.Counter() cnt_bellig = collections.Counter() reader = csv2.DictReader(src) reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] for row in reader: war_num = row['war_num'] belligerent = belligerent_key(row['ccode'], row['state_name']) cnt[war_num] += 1 cnt_bellig[belligerent] += 1 if cnt[war_num] == 1: session.add(model.War4(intnl=True, **utils.subset(row, cols))) for side in (1, 2): session.add( model.War4Side(war_side=war_side_pkey(side=side, war_num=war_num), side=side, war_num=war_num)) session.flush() if cnt_bellig[belligerent] == 1: session.add( model.War4Belligerent(belligerent=belligerent, belligerent_name=row['state_name'], ccode=row['ccode'])) session.flush() session.add(partic(row)) for i in (1, 2): add_partic_dates(row, i) session.commit()