def parse(filename): conn = psycopg2.connect("dbname=imdb") cur = conn.cursor() with open(filename) as f: for line in f.readlines(): line = line.strip().decode(ENCODING) matcher = re.compile('^(\S{10})\s+(\d+)\s+(\d{1,2}\.\d)(.*)$', re.U) match = matcher.search(line) if match != None: distribution = match.group(1) votes = int(match.group(2)) rank = float(match.group(3)) raw_title = match.group(4).strip() pt = util.parse_title(raw_title) parsed_title = (pt['title'], pt['year'], pt['tv_info'], pt['optional_info'], pt['is_movie']) cur.execute("INSERT INTO ratings (distribution, votes, rank, raw_title, title, year, tv_info, optional_info, is_movie) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);", (distribution, votes, rank, raw_title) + parsed_title) conn.commit() # print (distribution, votes, rank, util.parse_title(raw_title)) else: print line cur.close() conn.close()
def test_parse_title_4(self): r = util.parse_title('(500) Days of Summer (2009)') self.assertEqual(r['tv_info'], None) self.assertEqual(r['title'], '(500) Days of Summer') self.assertEqual(r['year'], 2009) self.assertEqual(r['is_movie'], True) self.assertEqual(r['optional_info'], None)
def test_parse_title_1(self): r = util.parse_title('Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)') self.assertEqual(r['tv_info'], None) self.assertEqual(r['title'], 'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb') self.assertEqual(r['year'], 1964) self.assertEqual(r['is_movie'], True) self.assertEqual(r['optional_info'], None)
def test_parse_title_3(self): r = util.parse_title('"Üb immer Treu nach Möglichkeit" (1966) (TV) {{SSUSSPEND}}') self.assertEqual(r['tv_info'], '(TV)') self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit') self.assertEqual(r['year'], 1966) self.assertEqual(r['is_movie'], False) self.assertEqual(r['optional_info'], '{SSUSSPEND}')
def test_parse_title_2(self): r = util.parse_title('"Üb immer Treu nach Möglichkeit" (1966) {Ja, wenn die Musik nicht wär (#1.6)}') self.assertEqual(r['tv_info'], None) self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit') self.assertEqual(r['year'], 1966) self.assertEqual(r['is_movie'], False) self.assertEqual(r['optional_info'], 'Ja, wenn die Musik nicht wär (#1.6)')
def parse(filename): conn = psycopg2.connect("dbname=imdb") cur = conn.cursor() with open(filename) as f: for line in f.readlines(): line = line.strip().decode(ENCODING) matcher = re.compile('^(\S{10})\s+(\d+)\s+(\d{1,2}\.\d)(.*)$', re.U) match = matcher.search(line) if match != None: distribution = match.group(1) votes = int(match.group(2)) rank = float(match.group(3)) raw_title = match.group(4).strip() pt = util.parse_title(raw_title) parsed_title = (pt['title'], pt['year'], pt['tv_info'], pt['optional_info'], pt['is_movie']) cur.execute( "INSERT INTO ratings (distribution, votes, rank, raw_title, title, year, tv_info, optional_info, is_movie) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);", (distribution, votes, rank, raw_title) + parsed_title) conn.commit() # print (distribution, votes, rank, util.parse_title(raw_title)) else: print line cur.close() conn.close()
def parse_movie(line): fields = filter(None, line.split('\t')) pt = util.parse_title(fields[0]) parsed_range = parse_range(fields[1]) return (fields[0], fields[1]) + (pt['title'], pt['year'], pt['tv_info'], pt['optional_info'], pt['is_movie']) + parsed_range
def test_parse_title_3(self): r = util.parse_title( '"Üb immer Treu nach Möglichkeit" (1966) (TV) {{SSUSSPEND}}') self.assertEqual(r['tv_info'], '(TV)') self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit') self.assertEqual(r['year'], 1966) self.assertEqual(r['is_movie'], False) self.assertEqual(r['optional_info'], '{SSUSSPEND}')
def test_parse_title_2(self): r = util.parse_title( '"Üb immer Treu nach Möglichkeit" (1966) {Ja, wenn die Musik nicht wär (#1.6)}' ) self.assertEqual(r['tv_info'], None) self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit') self.assertEqual(r['year'], 1966) self.assertEqual(r['is_movie'], False) self.assertEqual(r['optional_info'], 'Ja, wenn die Musik nicht wär (#1.6)')
def test_parse_title_1(self): r = util.parse_title( 'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)' ) self.assertEqual(r['tv_info'], None) self.assertEqual( r['title'], 'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb' ) self.assertEqual(r['year'], 1964) self.assertEqual(r['is_movie'], True) self.assertEqual(r['optional_info'], None)
def parse_movie(line): fields = filter(None, line.split("\t")) pt = util.parse_title(fields[0]) parsed_name = (pt["title"], pt["year"], pt["tv_info"], pt["optional_info"], pt["is_movie"]) full_address = fields[1] parsed_address = parse_address(full_address) if len(parsed_address) != 4: print parsed_address if len(fields) == 2: return (fields[0], fields[1], None) + parsed_name + parsed_address else: return (fields[0], fields[1], fields[2]) + parsed_name + parsed_address
def parse_movie(line): fields = filter(None, line.split("\t")) pt = util.parse_title(fields[0]) parsed_name = (pt['title'], pt['year'], pt['tv_info'], pt['optional_info'], pt['is_movie']) full_address = fields[1] parsed_address = parse_address(full_address) if len(parsed_address) != 4: print parsed_address if len(fields) == 2: return (fields[0], fields[1], None) + parsed_name + parsed_address else: return (fields[0], fields[1], fields[2]) + parsed_name + parsed_address