def load_war4_links(inter, intra, nonstate): session = model.SESSION() def _int(x): y = int(x) return y if y > 0 else None def clean(x): return [_int(y.strip()) for y in x.split(',')] def load_link(war_from, war_to): if war_from and war_to: q = session.query(model.War4Link).\ filter(model.War4Link.war_from == war_from).\ filter(model.War4Link.war_to == war_to) if q.count() == 0: session.add(model.War4Link(war_from=war_from, war_to=war_to)) def load_file(src): reader = csv2.DictReader(src, encoding='latin-1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] for row in reader: war_num = int(row['war_num']) for x in clean(row['trans_from']): load_link(x, war_num) for x in clean(row['trans_to']): load_link(war_num, x) load_file(inter) load_file(intra) load_file(nonstate) session.commit()
def load_ksg2cowyear(): """ Load data into ksg_to_cow_year all data derived from ksg_to_cow table """ session = model.SESSION() for x in session.query(model.KsgToCow): start_date = x.start_date end_date = x.end_date yr = start_date.year while yr <= end_date.year: yrdays = 366 if calendar.isleap(yr) else 365 start_year = start_date <= datetime.date(yr, 1, 1) end_year = end_date >= datetime.date(yr, 12, 31) mid_year = (end_date >= datetime.date(yr, 6, 30) and start_date <= datetime.date(yr, 6, 30)) frac_year = ((min(datetime.date(yr, 12, 31), end_date) - max(datetime.date(yr, 1, 1), start_date)).days + 1.0) / yrdays session.add(model.KsgToCowYear(cow_ccode = x.cow_ccode, ksg_ccode = x.ksg_ccode, year = yr, start_year = start_year, end_year = end_year, mid_year = mid_year, frac_year = frac_year)) yr += 1 session.flush() session.commit()
def load_nmc(src): def missing(x): return x if x != '-9' else None def _int(x): try: return int(x) except TypeError: return None def _float(x): try: return float(x) except TypeError: return None session = model.SESSION() # session = model.SESSION() reader = csv2.DictReader(src, encoding='latin-1') for i, row in enumerate(reader): del row['stateabb'] del row['statenme'] del row['upopanomalycode'] del row['version'] for k in ['ccode', 'irst', 'milex', 'milper', 'tpop', 'upop']: row[k] = _int(row[k]) for k in ["pec", 'upopgrowth', 'cinc']: v = _float(row[k]) if v < 0: v = None row[k] = v session.add(model.Nmc(**row)) session.commit()
def load_midb(src): session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] cols = [c.name for c in model.MidB.__table__.columns] int_cols = [ c.name for c in model.MidB.__table__.columns if isinstance(c.type, sa.types.Integer) ] cnt = collections.Counter() for row in reader: cnt[(row['disp_num'], row['ccode'])] += 1 row['spell_num'] = cnt[(row['disp_num'], row['ccode'])] start_date = utils.daterng(*(_int(row[k]) for k in ('st_year', 'st_mon', 'st_day'))) row['st_date_min'], row['st_date_min'] = start_date end_date = utils.daterng(*(_int(row[k]) for k in ('end_year', 'end_mon', 'end_day'))) row['end_date_min'], row['end_date_min'] = end_date ## set -9 to NULL for k in int_cols: row[k] = _int(row[k]) session.add(model.MidB(**utils.subset(row, cols))) session.commit()
def load_polity(src): session = model.SESSION() reader = xls.DictReader(src) for row in reader: for k in ('scode', 'country'): del row[k] session.add(model.PolityStateYear(**row)) session.commit()
def load_ksg_system(): """ Create cow_system table """ session = model.SESSION() for st in session.query(model.KsgSysMembership): for yr in range(st.start_date.year, st.end_date.year + 1): session.add(model.KsgSystem(ccode = st.ccode, year = yr)) session.flush() session.commit()
def load_nmc_codes(src): session = model.SESSION() for tbl, v in yaml.load(src).iteritems(): data = [] for value, label in v.iteritems(): data.append({'value': unicode(value), 'label': unicode(label)}) model.Base.metadata.tables[tbl].insert().execute(data) session.commit()
def load_enum_from_yaml(src): """ Load dict of dicts into tables with key/value combinations """ session = model.SESSION() for tbl, v in yaml.load(src).iteritems(): data = [] for value, label in v.iteritems(): data.append({'value': unicode(value), 'label': unicode(label)}) model.Base.metadata.tables[tbl].insert().execute(data) session.commit()
def load_ksgp4use(src): """ Load data for table ksgp4use """ session = model.SESSION() reader = csv2.DictReader(src, delimiter=' ') reader.fieldnames = [x.lower() for x in reader.fieldnames] cols = [x.name for x in model.KsgP4use.__table__.c] for row in reader: for k, v in row.iteritems(): if v == '.': row[k] = None session.add(model.KsgP4use(**utils.subset(row, cols))) session.commit()
def load_polity_states(src): POLITY_MAX_YEAR = model.PolitySysMembership.ONGOING session = model.SESSION() data = yaml.load(src) cnt = collections.Counter() for x in data: ccode = x['ccode'] cnt[ccode] += 1 x['end_year'] = x['end_year'] if x['end_year'] else POLITY_MAX_YEAR session.add(model.PolitySysMembership(interval = cnt[ccode], **utils.subset(x, ('ccode', '')))) session.commit()
def load_war4_list(src): """ Load war4_list """ session = model.SESSION() PAT = re.compile( r"\d{4} +(?P<name>.*) +(?P<type>Intra|Extra|Non|Inter)-State War +#(?P<warnum>\d+)" ) for line in src: m = PAT.match(line) if m: session.add( model.War4List(war_num=int(m.group('warnum')), war_name=unicode( m.group('name').strip(), 'utf-8'))) session.commit()
def load_polity_states(src): session = model.SESSION() data = yaml.load(src) cnt = collections.Counter() cols1 = ('ccode', 'scode', 'country') cols2 = ('ccode', 'start_year', 'end_year') for row in data: cnt[row['ccode']] += 1 if cnt[row['ccode']] == 1: data1 = utils.subset(row, cols1) session.add(model.PolityState(**data1)) if not row['end_year']: row['end_year'] = model.PolitySysMembership.ONGOING data2 = utils.subset(row, cols2) data2['interval'] = cnt[row['ccode']] session.add(model.PolitySysMembership(**data2)) session.commit()
def load_cow_majors(src): """ Load data into cow_majors """ session = model.SESSION() reader = csv2.DictReader(src) reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] cnt = collections.Counter() for row in reader: ccode = row['ccode'] cnt[row['ccode']] += 1 st_date = utils.row_ymd(row, 'st_year', 'st_month', 'st_day') end_date = utils.row_ymd(row, 'end_year', 'end_month', 'end_day') session.add( model.CowMajor(ccode=ccode, interval=cnt[ccode], st_date=st_date, end_date=end_date)) session.commit()
def load_war3_partic(src): """ Load COW War Data v. 3, Participants """ session = model.SESSION() def _int(x): try: return int(x) except TypeError: return None def _dates(row, n): if row['yr_beg%d' % n]: y = model.War3ParticDate() y.war_partic = row['war_partic'] y.spell_no = n date_beg = utils.daterng(_int(row['yr_beg%d' % n]), _int(row['mon_beg%d' % n]), _int(row['day_beg%d' % n])) y.date_beg_min, y.date_beg_max = date_beg date_end = utils.daterng(_int(row['yr_end%d' % n]), _int(row['mon_end%d' % n]), _int(row['day_end%d' % n])) y.date_end_min, y.date_end_max = date_end session.add(y) reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] war_cols = [x.name for x in model.War3Partic.__table__.c] war_date_cols = [x.name for x in model.War3ParticDate.__table__.c] cnt = collections.Counter() for row in reader: ## Account for multiple country-war participations key = (row['war_no'], row['state_num']) cnt[key] += 1 row['partic_no'] = cnt[key] row['war_partic'] = war_partic_pkey(row['war_no'], row['state_num'], row['partic_no']) ## replace missing values for k, v in row.iteritems(): row[k] = utils.replmiss(v, lambda x: x in ("-999", "-888")) session.add(model.War3Partic(**utils.subset(row, war_cols))) ## Dates for i in (1, 2): _dates(row, i) session.commit()
def load_mid_links(src): """ Load tables mid_link_mid and link_mid_war """ session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] for row in reader: disp_num = row['disp_num'] for k in ('link%d' % i for i in range(1, 4)): link = row[k] if link == '0' or link is None: continue elif link[-1] == 'W': session.add( model.MidLinkWar(disp_num=disp_num, war_num=link[:-1])) else: session.add( model.MidLinkMid(disp_num_1=disp_num, disp_num_2=link)) session.commit()
def load_war3(src): """ Load COW War Data v. 3 """ session = model.SESSION() def _int(x): try: return int(x) except TypeError: return None def _dates(row, n): if row['yr_beg%d' % n]: y = model.War3Date() y.war_no = row['war_no'] y.spell_no = n date_beg = utils.daterng(_int(row['yr_beg%d' % n]), _int(row['mon_beg%d' % n]), _int(row['day_beg%d' % n])) y.date_beg_min, y.date_beg_max = date_beg date_end = utils.daterng(_int(row['yr_end%d' % n]), _int(row['mon_end%d' % n]), _int(row['day_end%d' % n])) y.date_end_min, y.date_end_max = date_end session.add(y) reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] war_cols = [x.name for x in model.War3.__table__.c] war_date_cols = [x.name for x in model.War3Date.__table__.c] for row in reader: for k, v in row.iteritems(): row[k] = utils.replmiss(v, lambda x: x in ("-999", "-888")) ## Inter-state war does not have a war_type if 'war_type' not in row.keys(): row['war_type'] = 1 row['oceania'] = row['oceania'] if row['oceania'] else False session.add(model.War3(**utils.subset(row, war_cols))) ## Dates for i in (1, 2): _dates(row, i) session.commit()
def load_polityd(src): session = model.SESSION() reader = xls.DictReader(src) columns = [x.name for x in model.PolityCase.__table__.c] cnt = collections.Counter() for row in reader: ccode = row['ccode'] cnt[ccode] += 1 row['pcase'] = cnt[ccode] row['present'] = row['present'] == '1' for i in ('e', 'b'): row['%sday' % i] = utils.replmiss(row['%sday' % i], lambda x: int(x) == 99) row['%smonth' % i] = utils.replmiss(row['%smonth' % i], lambda x: int(x) == 99) row['%syear' % i] = utils.replmiss(row['%syear' % i], lambda x: int(x) == 9999) if row['byear']: row['bdate'] = utils.row_ymd(row, 'byear', 'bmonth', 'bday') if row['eyear']: row['edate'] = utils.row_ymd(row, 'eyear', 'emonth', 'eday') session.add(model.PolityCase(**utils.subset(row, columns))) session.commit()
def load_contdir(src): """ Load direct contiguity data from csv file""" session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') cols = [x.name for x in model.ContDir.__table__.c] for row in reader: start_mon = datetime.date(int(row['begin'][:4]), int(row['begin'][4:]), 1) end_mon = datetime.date(int(row['end'][:4]), int(row['end'][4:]), 1) if end_mon.month == 12: end_mon = datetime.date(end_mon.year + 1, 1, 1) else: end_mon = datetime.date(end_mon.year, end_mon.month + 1, 1) end_mon += datetime.timedelta(days=-1) row['end_date'] = end_mon row['start_date'] = start_mon data = utils.subset(row, cols) session.add(model.ContDir(**data)) session.commit()
def load_cow_states(src): """ Load data into cow_statelist and cow_system_membership """ session = model.SESSION() reader = csv2.DictReader(src) reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] cnt = collections.Counter() for row in reader: ccode = row['ccode'] cnt[row['ccode']] += 1 if cnt[ccode] == 1: session.add( model.CowState(ccode=ccode, state_abb=row['state_abb'], state_nme=row['state_nme'])) st_date = utils.row_ymd(row, 'st_year', 'st_month', 'st_day') end_date = utils.row_ymd(row, 'end_year', 'end_month', 'end_day') session.add( model.CowSysMembership(ccode=ccode, interval=cnt[ccode], st_date=st_date, end_date=end_date)) session.commit()
def load_cow_system(): """ load data into cow_system """ session = model.SESSION() for st in session.query(model.CowSysMembership): for yr in range(st.st_date.year, st.end_date.year + 1): eoy = datetime.date(yr, 12, 31) boy = datetime.date(yr, 1, 1) moy = datetime.date(yr, 7, 2) start_year = (st.st_date <= boy and st.end_date >= boy) mid_year = (st.st_date <= moy and st.end_date >= moy) end_year = (st.st_date <= eoy and st.end_date >= eoy) ndays = 366 if calendar.isleap(yr) else 365 frac_year = (min(eoy, st.end_date) - max(boy, st.st_date)).days / float(ndays) session.add( model.CowSystem(ccode=st.ccode, year=yr, start_year=start_year, mid_year=mid_year, end_year=end_year, frac_year=frac_year)) session.flush() session.commit()
def _load_ksg_states(src, microstate): """ Create ksg_states """ session = model.SESSION() HEADER = ['idnum', 'idabb', 'country_name', 'start_date', 'end_date'] rowgen = csv2.DictReader(src, delimiter='\t', fieldnames = HEADER, encoding='latin-1') cnt = collections.Counter() for row in rowgen: idnum = row["idnum"] cnt[idnum] += 1 if cnt[idnum] == 1: session.add(model.KsgState(idnum = idnum, idabb = row["idabb"], country_name = row["country_name"], microstate = microstate)) interval = cnt[idnum] start_date = _iisystem_dates(row['start_date']) end_date = _iisystem_dates(row['end_date']) session.add(model.KsgSysMembership(ccode = idnum, start_date = start_date, end_date = end_date, interval = interval)) session.commit()
def load_war4_inter(src): """ Add Inter-state war data to war4_* tables updates tables cow_war4, cow_belligerents, cow_war4_participation, cow_war4_partic_dates """ session = model.SESSION() def _int(x): y = int(x) return y if y >= 0 else None def partic(row): y = model.War4Partic() belligerent = belligerent_key(row['ccode'], row['state_name']) war_side = war_side_pkey(int(row['war_num']), int(row['side'])) y.war_partic = war_partic_pkey(war_side, belligerent) y.war_side = war_side y.belligerent = belligerent for k, v in WHERE_FOUGHT[_int(row['where_fought'])].iteritems(): setattr(y, k, v) y.outcome = row['outcome'] y.bat_death = _int(row['bat_death']) y.initiator = (int(row['initiator']) == 1) return y def add_partic_dates(row, n): if row['start_year%d' % n] != '-8': y = model.War4ParticDate() war_side = war_side_pkey(int(row['war_num']), int(row['side'])) belligerent = belligerent_key(row['ccode'], row['state_name']) y.war_partic = war_partic_pkey(war_side, belligerent) y.partic_num = n start_date = utils.daterng(_int(row['start_year%d' % n]), _int(row['start_month%d' % n]), _int(row['start_day%d' % n])) y.start_date_min, y.start_date_max = start_date if row['end_year%d' % n] == "-7": y.end_date_min = y.end_date_max = model.War4.ONGOING_DATE y.ongoing = True else: end_date = utils.daterng(_int(row['end_year%d' % n]), _int(row['end_month%d' % n]), _int(row['end_day%d' % n])) y.end_date_min, y.end_date_max = end_date y.ongoing = False session.add(y) cols = ("war_num", "war_name", "war_type") cnt = collections.Counter() cnt_bellig = collections.Counter() reader = csv2.DictReader(src) reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] for row in reader: war_num = row['war_num'] belligerent = belligerent_key(row['ccode'], row['state_name']) cnt[war_num] += 1 cnt_bellig[belligerent] += 1 if cnt[war_num] == 1: session.add(model.War4(intnl=True, **utils.subset(row, cols))) for side in (1, 2): session.add( model.War4Side(war_side=war_side_pkey(side=side, war_num=war_num), side=side, war_num=war_num)) session.flush() if cnt_bellig[belligerent] == 1: session.add( model.War4Belligerent(belligerent=belligerent, belligerent_name=row['state_name'], ccode=row['ccode'])) session.flush() session.add(partic(row)) for i in (1, 2): add_partic_dates(row, i) session.commit()
def load_ksg2cow(): """ Load data into ksg_to_cow see cowfilter.pl in http://privatewww.essex.ac.uk/~ksg/data/exptradegdpv4.1.zip Apart from disagreements in the dates in which countries were in the system, which can be handled by merging, the main differences are in the following countries: - Yemen post-1990 - KSG considers Yemen a continuation of North Yemen - COW replaces both 678, and 680 with ccode 679 on 1990-5-22 - Germany post-1991 - COW switches from ccode 260 to 255 on 1990-10-30 - KSG considers modern Germany a continuation of the German Federal Republic. Differing COW and KSG codes (COW, KSG) - Nauru : 970, 971 - Tonga : 955, 972 - Tuvalu : 947, 973 - Kiribati : 946, 970 """ session = model.SESSION() q = session.query(model.CowSysMembership, model.KsgSysMembership).\ filter(model.CowSysMembership.ccode == model.KsgSysMembership.ccode).\ filter(model.CowSysMembership.st_date <= model.KsgSysMembership.end_date).\ filter(model.CowSysMembership.end_date >= model.KsgSysMembership.start_date) for cow, ksg in q: start_date = max(cow.st_date, ksg.start_date) end_date = min(cow.end_date, ksg.end_date) session.add(model.KsgToCow(cow_ccode = cow.ccode, ksg_ccode = ksg.ccode, start_date = start_date, end_date = end_date)) ## Get parts not in the data # if cow.st_date < ksg.start_date: # session.add(model.KsgToCow(cow_ccode = cow.ccode, # ksg_ccode = None, # start_date = cow.st_date, # end_date = ksg.start_date - datetime.timedelta(days=1))) # elif cow.st_date > ksg.start_date: # session.add(model.KsgToCow(cow_ccode = None, # ksg_ccode = ksg.ccode, # start_date = ksg.start_date, # end_date = cow.st_date - datetime.timedelta(days=1))) # if cow.end_date < ksg.end_date and cow.end_date != model.CowSysMembership.ONGOING_DATE: # session.add(model.KsgToCow(cow_ccode = None, # ksg_ccode = ksg.ccode, # start_date = cow.end_date + datetime.timedelta(days=1), # end_date = min(ksg.end_date, # model.CowSysMembership.ONGOING_DATE))) # elif cow.end_date > ksg.end_date: # session.add(model.KsgToCow(cow_ccode = cow.ccode, # ksg_ccode = None, # start_date = ksg.end_date + datetime.timedelta(days=1), # end_date = cow.end_date)) session.flush() ## Update Germany Post 1990 # ger = session.query(model.KsgToCow).\ # filter(model.KsgToCow.ksg_ccode == 260).\ # filter(model.KsgToCow.start_date == # ger.cow_ccode = 255 #session.add(ger) session.add(model.KsgToCow(ksg_ccode = 260, cow_ccode = 255, start_date = datetime.date(1990, 10, 3), end_date = ONGOING)) # Update Yemen Post-1990 # yemen = session.query(model.KsgToCow).\ # filter(model.KsgToCow.ksg_ccode == 678).\ # filter(model.KsgToCow.start_date == datetime.date(1990, 5, 22)).one() # yemen.cow_ccode = 679 # session.add(yemen) session.add(model.KsgToCow(ksg_ccode = 678, cow_ccode = 679, start_date = datetime.date(1990, 5, 22), end_date = ONGOING)) session.flush() # Resolve Nauru, Tonga, Tuvalu, Kiribati for x in session.query(model.KsgToCow).\ filter(model.KsgToCow.ksg_ccode.in_([970, 971, 972, 973])).all(): session.delete(x) for x in session.query(model.KsgToCow).\ filter(model.KsgToCow.cow_ccode.in_([970, 955, 947, 946])).all(): session.delete(x) NEWDATA = [ # Newdata # Nauru 970, 971 # {'cow_ccode': None, # 'ksg_ccode': 971, # 'start_date' : datetime.date(1968, 12, 31), # 'end_date' : datetime.date(1999, 9, 13)}, {'cow_ccode': 970, 'ksg_ccode': 971, 'start_date' : datetime.date(1999, 9, 14), 'end_date' : ONGOING}, # Tonga : 955, 972 # {'cow_ccode': None, # 'ksg_ccode': 972, # 'start_date' : datetime.date(1970, 6, 4), # 'end_date' : datetime.date(1999, 9, 13)}, {'cow_ccode': 955, 'ksg_ccode': 972, 'start_date' : datetime.date(1999, 9, 14), 'end_date' : ONGOING}, # Tuvalu : 947, 973 # {'cow_ccode': None, # 'ksg_ccode': 973, # 'start_date' : datetime.date(1978, 10, 1), # 'end_date' : datetime.date(2000, 9, 4)}, {'cow_ccode': 947, 'ksg_ccode': 973, 'start_date' : datetime.date(2000, 9, 5), 'end_date' : ONGOING}, # Kiribati : 946, 970 # {'cow_ccode': None, # 'ksg_ccode': 970, # 'start_date' : datetime.date(1979, 7, 12), # 'end_date' : datetime.date(1999, 9, 13)}, {'cow_ccode': 946, 'ksg_ccode': 970, 'start_date' : datetime.date(1999, 9, 14), 'end_date' : ONGOING}] for x in NEWDATA: session.add(model.KsgToCow(**x)) ## Any KSG without any matches # for ksg in session.query(model.KsgSysMembership): # if ksg.start_date < ONGOING: # q = session.query(model.KsgToCow).\ # filter(ksg.ccode == model.KsgToCow.ksg_ccode).\ # filter(sa.or_(ksg.end_date < model.KsgToCow.start_date, # ksg.start_date > model.KsgToCow.end_date)) # if q.count() == 0: # session.add(model.KsgToCow(ksg_ccode = ksg.ccode, # start_date = ksg.start_date, # end_date = min(ksg.end_date, ONGOING))) # for cow in session.query(model.CowSysMembership): # if cow.st_date < ONGOING: # q = session.query(model.KsgToCow).\ # filter(cow.ccode == model.KsgToCow.cow_ccode).\ # filter(sa.or_(cow.end_date < model.KsgToCow.start_date, # cow.st_date > model.KsgToCow.end_date)) # if q.count() == 0: # session.add(model.KsgToCow(cow_ccode = cow.ccode, # start_date = cow.st_date, # end_date = cow.end_date)) session.commit()
def load_war4_intra(src): """ Add Intra-state war data to war4_* tables updates tables cow_war4, cow_belligerents, cow_war4_participation, cow_war4_partic_dates """ session = model.SESSION() def _int(x): try: y = int(re.sub(',', '', x)) return y if y > 0 else None except TypeError: return None def _side(x): return x if x != "-8" else None def add_belligerent(session, name, ccode): if name != "-8": belligerent = belligerent_key(ccode, name) q = session.query(model.War4Belligerent).\ filter(model.War4Belligerent.belligerent == belligerent) if q.count() == 0: obj = model.War4Belligerent(belligerent=belligerent, belligerent_name=name, ccode=_int(ccode)) session.add(obj) session.flush() def partic(row, belligerent, side): y = model.War4Partic() war_side = war_side_pkey(_int(row['war_num']), (side == 'b') + 1) y.war_partic = war_partic_pkey(war_side, belligerent) y.belligerent = belligerent y.war_side = war_side for k, v in WHERE_FOUGHT[_int(row['where_fought'])].iteritems(): setattr(y, k, v) ## outcomes given in Side A / Side B rather than winner/loser ## per participant outcome = _int(row['outcome']) if side == 2: if outcome == 2: outcome = 1 elif outcome == 1: outcome = 2 y.outcome = outcome y.bat_death = _int(row['side_%sdeaths' % side]) y.initiator = (row['initiator'] == y.belligerent) return y def add_partic_dates(row, belligerent, side, n): if row['start_year%d' % n] != '-8': y = model.War4ParticDate() war_side = war_side_pkey(_int(row['war_num']), (side == 'b') + 1) y.war_partic = war_partic_pkey(war_side, belligerent) y.belligerent = belligerent y.war_side = war_side y.partic_num = n start_date = utils.daterng(_int(row['start_year%d' % n]), _int(row['start_month%d' % n]), _int(row['start_day%d' % n])) y.start_date_min, y.start_date_max = start_date if row['end_year%d' % n] == "-7": y.end_date_min = y.end_date_max = model.War4.ONGOING_DATE y.ongoing = True else: end_date = utils.daterng(_int(row['end_year%d' % n]), _int(row['end_month%d' % n]), _int(row['end_day%d' % n])) y.end_date_min, y.end_date_max = end_date y.ongoing = False session.add(y) cnt = collections.Counter() reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] for row in reader: war_num = row['war_num'] cnt[war_num] += 1 ## Add war if cnt[war_num] == 1: session.add( model.War4(war_num=war_num, war_name=row['war_name'], war_type=int(row['war_type']), intnl=row['intnl'] == '1')) for side in (1, 2): session.add( model.War4Side(war_side=war_side_pkey(war_num, side), side=side, war_num=row['war_num'])) session.flush() if _side(row['side_a']): add_belligerent(session, row['side_a'], row['ccode_a']) belligerent = belligerent_key(row['ccode_a'], row['side_a']) session.add(partic(row, belligerent, 'a')) for i in (1, 2): add_partic_dates(row, belligerent, 'a', i) if _side(row['side_b']): add_belligerent(session, row['side_b'], row['ccode_b']) belligerent = belligerent_key(row['ccode_b'], row['side_b']) session.add(partic(row, belligerent, 'b')) for i in (1, 2): add_partic_dates(row, belligerent, 'b', i) session.flush() session.commit()
def load_war4_nonstate(src): def _int(x): try: y = int(re.sub(',', '', x)) return y if y > 0 else None except TypeError: return None def _side(x): return x if x != "-8" else None def add_belligerent(session, name): ccode = None if name != "-8": belligerent = belligerent_key(ccode, name) q = session.query(model.War4Belligerent).\ filter(model.War4Belligerent.belligerent == belligerent) if q.count() == 0: obj = model.War4Belligerent(belligerent=belligerent, belligerent_name=name, ccode=ccode) session.add(obj) session.flush() def partic(row, side, name): y = model.War4Partic() war_side = war_side_pkey(_int(row['war_num']), (side == "b") + 1) belligerent = belligerent_key(None, name) y.war_partic = war_partic_pkey(war_side, belligerent) y.war_side = war_side y.belligerent = belligerent for k, v in WHERE_FOUGHT[_int(row['where_fought'])].iteritems(): setattr(y, k, v) outcome = _int(row['outcome']) if side: if outcome == 2: outcome = 1 elif outcome == 1: outcome = 2 y.outcome = outcome y.initiator = (row['initiator'] == side.upper()) return y def add_partic_dates(row, name, side): y = model.War4ParticDate() war_side = war_side_pkey(_int(row['war_num']), (side == "b") + 1) belligerent = belligerent_key(None, name) y.war_partic = war_partic_pkey(war_side, belligerent) y.partic_num = 1 start_date = utils.daterng(_int(row['start_year']), _int(row['start_month']), _int(row['start_day'])) y.start_date_min, y.start_date_max = start_date if row['end_year'] == "-7": y.end_date_min = y.end_date_max = model.War4.ONGOING_DATE y.ongoing = True else: end_date = utils.daterng(_int(row['end_year']), _int(row['end_month']), _int(row['end_day'])) y.end_date_min, y.end_date_max = end_date y.ongoing = False session.add(y) session = model.SESSION() reader = csv2.DictReader(src, encoding='latin1') reader.fieldnames = [utils.camel2under(x) for x in reader.fieldnames] for row in reader: war_num = row['war_num'] ## Add war session.add( model.War4(war_num=war_num, war_name=row['war_name'], war_type=int(row['war_type']), bat_deaths=_int(row['total_combat_deaths']))) for side in ('a', 'b'): side_bool = (side == 'b') + 1 session.add( model.War4Side(war_side=war_side_pkey(war_num, side_bool), side=side_bool, war_num=war_num, bat_death=_int(row['side_%sdeaths' % side]))) session.flush() for i in (1, 2): name = row['side_a%d' % i] if name != '-8': add_belligerent(session, name) session.add(partic(row, 'a', name)) add_partic_dates(row, name, 'a') for i in range(1, 6): name = row['side_b%d' % i] if name != '-8': add_belligerent(session, name) session.add(partic(row, 'b', name)) add_partic_dates(row, name, 'b') session.flush() session.commit()