def do_upgrade(): column_exists = run_sql( "SHOW COLUMNS FROM `aidPERSONIDDATA` LIKE 'datablob'") if not column_exists: run_sql("""ALTER TABLE aidPERSONIDDATA ADD COLUMN datablob LONGBLOB NULL DEFAULT NULL AFTER data;""" ) run_sql( """ALTER TABLE aidPERSONIDDATA MODIFY data VARCHAR( 256 ) NULL DEFAULT NULL""" ) pids_with_tickets = set( run_sql( """select personid from aidPERSONIDDATA where tag like %s""", ('rt_%', ))) pids_with_tickets = [pid[0] for pid in pids_with_tickets] for pid in pids_with_tickets: request_tickets = run_sql( """select tag, data, opt1 from aidPERSONIDDATA where personid=%s and tag like 'rt_%%'""", (pid, )) request_tickets = sorted(request_tickets, key=itemgetter(2)) request_tickets = groupby(request_tickets, key=itemgetter(2)) request_tickets = [[[(i[0][3:], i[1]) for i in tinfo], tid] for tid, tinfo in request_tickets] new_request_tickets = list() for request_ticket_attributes, tid in request_tickets: new_request_ticket = {'tid': tid} operations = list() for tag, value in request_ticket_attributes: if tag == 'confirm': operations.append(('assign', value)) elif tag == 'repeal': operations.append(('reject', value)) else: new_request_ticket[tag] = value new_request_ticket['operations'] = operations if new_request_ticket['operations']: new_request_tickets.append(new_request_ticket) new_request_tickets_num = len(new_request_tickets) new_request_tickets = serialize(new_request_tickets) run_sql( """insert into aidPERSONIDDATA (personid, tag, datablob, opt1) values (%s, %s, %s, %s)""", (pid, 'request_tickets', new_request_tickets, new_request_tickets_num)) run_sql("""delete from aidPERSONIDDATA where tag like %s""", ('rt_%', ))
def do_upgrade(): column_exists = run_sql( "SHOW COLUMNS FROM `aidPERSONIDDATA` LIKE 'datablob'") if not column_exists: run_sql("""ALTER TABLE aidPERSONIDDATA ADD COLUMN datablob LONGBLOB NULL DEFAULT NULL AFTER data;""") run_sql( """ALTER TABLE aidPERSONIDDATA MODIFY data VARCHAR( 256 ) NULL DEFAULT NULL""") pids_with_tickets = set(run_sql("""select personid from aidPERSONIDDATA where tag like %s""", ('rt_%',))) pids_with_tickets = [pid[0] for pid in pids_with_tickets] for pid in pids_with_tickets: request_tickets = run_sql("""select tag, data, opt1 from aidPERSONIDDATA where personid=%s and tag like 'rt_%%'""", (pid,)) request_tickets = sorted(request_tickets, key=itemgetter(2)) request_tickets = groupby(request_tickets, key=itemgetter(2)) request_tickets = [[[(i[0][3:], i[1]) for i in tinfo], tid] for tid, tinfo in request_tickets] new_request_tickets = list() for request_ticket_attributes, tid in request_tickets: new_request_ticket = {'tid': tid} operations = list() for tag, value in request_ticket_attributes: if tag == 'confirm': operations.append(('assign', value)) elif tag == 'repeal': operations.append(('reject', value)) else: new_request_ticket[tag] = value new_request_ticket['operations'] = operations if new_request_ticket['operations']: new_request_tickets.append(new_request_ticket) new_request_tickets_num = len(new_request_tickets) new_request_tickets = serialize(new_request_tickets) run_sql( """insert into aidPERSONIDDATA (personid, tag, datablob, opt1) values (%s, %s, %s, %s)""", (pid, 'request_tickets', new_request_tickets, new_request_tickets_num)) run_sql("""delete from aidPERSONIDDATA where tag like %s""", ('rt_%', ))
def _create_dense_index(name_pids_dict, names_list): name_id = 0 args = list() for name in names_list: person_name, personids = name_pids_dict[name] args += [name_id, person_name, serialize(list(personids))] name_id += 1 populate_table('aidDENSEINDEX', ['name_id','person_name','personids'], args) set_dense_index_ready()
def _create_dense_index(name_pids_dict, names_list): name_id = 0 args = list() for name in names_list: person_name, personids = name_pids_dict[name] args += [name_id, person_name, serialize(list(personids))] name_id += 1 populate_table('aidDENSEINDEX', ['name_id', 'person_name', 'personids'], args) set_dense_index_ready()
def _create_dense_index(indexable_names_to_authors_mapping, indexable_names): args = list() string_id = 0 for name in indexable_names: authors, indexable_surname = indexable_names_to_authors_mapping[ name] args += [ string_id, name, serialize(list(authors)), 0, indexable_surname ] string_id += 1 populate_table('aidDENSEINDEX', [ 'id', 'indexable_string', 'personids', 'flag', 'indexable_surname' ], args)
def cache_name_variants_of_authors(author_to_name_and_occurrence_mapping): args = list() for author, names_and_occurrence in author_to_name_and_occurrence_mapping.iteritems( ): indexable_names_and_occurrence = dict() for name, occurrences in names_and_occurrence.iteritems(): asciified_name = translate_to_ascii(name)[0] indexable_name = create_indexable_name( split_name_parts(indexable_name_re.sub(' ', asciified_name))) try: indexable_names_and_occurrence[indexable_name] += occurrences except KeyError: indexable_names_and_occurrence[indexable_name] = occurrences args += [author, serialize(indexable_names_and_occurrence), 1] populate_table('aidDENSEINDEX', ['id', 'personids', 'flag'], args, empty_table_first=False)
def _create_inverted_lists(indexable_names): inverted_lists = dict() string_id = 0 for name in indexable_names: qgrams = set(get_qgrams_from_string(name, QGRAM_LEN)) for qgram in qgrams: try: inverted_list, cardinality = inverted_lists[qgram] inverted_list.add(string_id) inverted_lists[qgram][1] = cardinality + 1 except KeyError: inverted_lists[qgram] = [set([string_id]), 1] string_id += 1 args = list() for qgram in inverted_lists: inverted_list, cardinality = inverted_lists[qgram] args += [qgram, serialize(list(inverted_list)), cardinality] populate_table('aidINVERTEDLISTS', ['qgram', 'inverted_list', 'list_cardinality'], args) set_inverted_lists_ready()
def create_inverted_lists_worker(names_list): name_id = 0 inverted_lists = dict() for name in names_list: qgrams = set(get_qgrams_from_string(name, QGRAM_LEN)) for qgram in qgrams: try: inverted_list, cardinality = inverted_lists[qgram] inverted_list.add(name_id) inverted_lists[qgram][1] = cardinality + 1 except KeyError: inverted_lists[qgram] = [set([name_id]), 1] name_id += 1 args = list() for qgram in inverted_lists.keys(): inverted_list, cardinality = inverted_lists[qgram] args += [qgram, serialize(list(inverted_list)), cardinality] populate_table('aidINVERTEDLISTS', ['qgram','inverted_list','list_cardinality'], args) set_inverted_lists_ready()
def create_inverted_lists_worker(names_list): name_id = 0 inverted_lists = dict() for name in names_list: qgrams = set(get_qgrams_from_string(name, QGRAM_LEN)) for qgram in qgrams: try: inverted_list, cardinality = inverted_lists[qgram] inverted_list.add(name_id) inverted_lists[qgram][1] = cardinality + 1 except KeyError: inverted_lists[qgram] = [set([name_id]), 1] name_id += 1 args = list() for qgram in inverted_lists.keys(): inverted_list, cardinality = inverted_lists[qgram] args += [qgram, serialize(list(inverted_list)), cardinality] populate_table('aidINVERTEDLISTS', ['qgram', 'inverted_list', 'list_cardinality'], args) set_inverted_lists_ready()