class Dumper: RECORD = 0 LOCATION = 1 def __init__(self, json_filename): with open(json_filename, 'rb') as file: self.tweets = json.load(file) self.target_fields = ["id", "bounding_box"] self.conn = Connection()() def __iter__(self, target): if target == Dumper.LOCATION: for tweet in self.tweets: (a, b), (c, d) = tweet['place']["bounding_box"] yield tuple([tweet['id'], a, b, c, d]) elif target == Dumper.RECORD: for tweet in self.tweets: yield tuple([tweet[field] for field in ["id", "create_at", "text"]]) def dump_all(self, table, value_count): try: sql = f'INSERT INTO {table} VALUES({"%s " * value_count});' for record in self: print(record) cur = self.conn.cursor() cur.execute(sql, record) cur.close() self.conn.commit() except psycopg2.DatabaseError as error: print(error) def get_location(self): pass
class Labeler: labels = {0: "TRUE", 1: "FALSE", 2: "NOT_SURE"} def __init__(self, role): self.role = role self.conn = Connection()() self.unlabeled = None def mark(self, tweet_id, value) -> None: try: cur = self.conn.cursor() sql = f'UPDATE records SET label{self.role} = {value} WHERE id = {tweet_id};' cur.execute(sql, (value, tweet_id)) cur.close() self.conn.commit() except (Exception, psycopg2.DatabaseError) as error: print(error) def get_next_unlabeled(self): cur = self.conn.cursor() sql = f'SELECT id, text FROM records WHERE label{self.role} IS NULL order by random() LIMIT 1;' cur.execute(sql) row = cur.fetchone() while row: yield row row = cur.fetchone() cur.close() self.conn.commit() def start(self): prev_id = None prev_text = None prev_label = None next_batch = self.get_next_unlabeled() while next_batch: for id, text in next_batch: char = self.get_next_char(text) while char == 'r': if prev_text: self.mark(prev_id, not prev_label) prev_label = (prev_label + 1) % 3 print( f"[{prev_text} is changed to {self.labels[prev_label]}]" ) self.mark(prev_id, prev_label) char = self.get_next_char(text) label = int(char) - 1 print(self.labels[label]) self.mark(id, label) prev_label = label prev_id = id prev_text = text next_batch = self.get_next_unlabeled() @staticmethod def get_next_char(text): char = None while not char or char not in list('123r'): print( f'================================================\n\n\n\n{text}\n\n\n\n\n\n\n\n\n\n([1] for True, [2] for False, [3] for not sure, [r] for reverse previous (rotate in three values), enter for skip to next) ->' ) char = input().strip() return char