Ejemplo n.º 1
0
def parse(filename):
  conn = psycopg2.connect("dbname=imdb")
  cur = conn.cursor()

  with open(filename) as f:
    for line in f.readlines():
      line = line.strip().decode(ENCODING)
      matcher = re.compile('^(\S{10})\s+(\d+)\s+(\d{1,2}\.\d)(.*)$', re.U)
      match = matcher.search(line)

      if match != None:
        distribution = match.group(1)
        votes = int(match.group(2))
        rank = float(match.group(3))
        raw_title = match.group(4).strip()

        pt = util.parse_title(raw_title)
        parsed_title = (pt['title'], pt['year'], pt['tv_info'], pt['optional_info'], pt['is_movie'])

        cur.execute("INSERT INTO ratings (distribution, votes, rank, raw_title, title, year, tv_info, optional_info, is_movie) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);", (distribution, votes, rank, raw_title) + parsed_title)
        conn.commit()
        # print (distribution, votes, rank, util.parse_title(raw_title))
      else:
        print line

  cur.close()
  conn.close()
Ejemplo n.º 2
0
 def test_parse_title_4(self):
     r = util.parse_title('(500) Days of Summer (2009)')
     self.assertEqual(r['tv_info'], None)
     self.assertEqual(r['title'], '(500) Days of Summer')
     self.assertEqual(r['year'], 2009)
     self.assertEqual(r['is_movie'], True)
     self.assertEqual(r['optional_info'], None)
Ejemplo n.º 3
0
 def test_parse_title_1(self):
   r = util.parse_title('Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)')
   self.assertEqual(r['tv_info'], None)
   self.assertEqual(r['title'], 'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb')
   self.assertEqual(r['year'], 1964)
   self.assertEqual(r['is_movie'], True)
   self.assertEqual(r['optional_info'], None)
Ejemplo n.º 4
0
 def test_parse_title_4(self):
   r = util.parse_title('(500) Days of Summer (2009)')
   self.assertEqual(r['tv_info'], None)
   self.assertEqual(r['title'], '(500) Days of Summer')
   self.assertEqual(r['year'], 2009)
   self.assertEqual(r['is_movie'], True)
   self.assertEqual(r['optional_info'], None)
Ejemplo n.º 5
0
 def test_parse_title_3(self):
   r = util.parse_title('"Üb immer Treu nach Möglichkeit" (1966) (TV) {{SSUSSPEND}}')
   self.assertEqual(r['tv_info'], '(TV)')
   self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit')
   self.assertEqual(r['year'], 1966)
   self.assertEqual(r['is_movie'], False)
   self.assertEqual(r['optional_info'], '{SSUSSPEND}')
Ejemplo n.º 6
0
 def test_parse_title_2(self):
   r = util.parse_title('"Üb immer Treu nach Möglichkeit" (1966) {Ja, wenn die Musik nicht wär (#1.6)}')
   self.assertEqual(r['tv_info'], None)
   self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit')
   self.assertEqual(r['year'], 1966)
   self.assertEqual(r['is_movie'], False)
   self.assertEqual(r['optional_info'], 'Ja, wenn die Musik nicht wär (#1.6)')
Ejemplo n.º 7
0
def parse(filename):
    conn = psycopg2.connect("dbname=imdb")
    cur = conn.cursor()

    with open(filename) as f:
        for line in f.readlines():
            line = line.strip().decode(ENCODING)
            matcher = re.compile('^(\S{10})\s+(\d+)\s+(\d{1,2}\.\d)(.*)$',
                                 re.U)
            match = matcher.search(line)

            if match != None:
                distribution = match.group(1)
                votes = int(match.group(2))
                rank = float(match.group(3))
                raw_title = match.group(4).strip()

                pt = util.parse_title(raw_title)
                parsed_title = (pt['title'], pt['year'], pt['tv_info'],
                                pt['optional_info'], pt['is_movie'])

                cur.execute(
                    "INSERT INTO ratings (distribution, votes, rank, raw_title, title, year, tv_info, optional_info, is_movie) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);",
                    (distribution, votes, rank, raw_title) + parsed_title)
                conn.commit()
                # print (distribution, votes, rank, util.parse_title(raw_title))
            else:
                print line

    cur.close()
    conn.close()
Ejemplo n.º 8
0
def parse_movie(line):
    fields = filter(None, line.split('\t'))
    pt = util.parse_title(fields[0])
    parsed_range = parse_range(fields[1])

    return (fields[0],
            fields[1]) + (pt['title'], pt['year'], pt['tv_info'],
                          pt['optional_info'], pt['is_movie']) + parsed_range
Ejemplo n.º 9
0
 def test_parse_title_3(self):
     r = util.parse_title(
         '"Üb immer Treu nach Möglichkeit" (1966) (TV) {{SSUSSPEND}}')
     self.assertEqual(r['tv_info'], '(TV)')
     self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit')
     self.assertEqual(r['year'], 1966)
     self.assertEqual(r['is_movie'], False)
     self.assertEqual(r['optional_info'], '{SSUSSPEND}')
Ejemplo n.º 10
0
 def test_parse_title_2(self):
     r = util.parse_title(
         '"Üb immer Treu nach Möglichkeit" (1966) {Ja, wenn die Musik nicht wär (#1.6)}'
     )
     self.assertEqual(r['tv_info'], None)
     self.assertEqual(r['title'], 'Üb immer Treu nach Möglichkeit')
     self.assertEqual(r['year'], 1966)
     self.assertEqual(r['is_movie'], False)
     self.assertEqual(r['optional_info'],
                      'Ja, wenn die Musik nicht wär (#1.6)')
Ejemplo n.º 11
0
 def test_parse_title_1(self):
     r = util.parse_title(
         'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)'
     )
     self.assertEqual(r['tv_info'], None)
     self.assertEqual(
         r['title'],
         'Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb'
     )
     self.assertEqual(r['year'], 1964)
     self.assertEqual(r['is_movie'], True)
     self.assertEqual(r['optional_info'], None)
Ejemplo n.º 12
0
def parse_movie(line):
    fields = filter(None, line.split("\t"))
    pt = util.parse_title(fields[0])
    parsed_name = (pt["title"], pt["year"], pt["tv_info"], pt["optional_info"], pt["is_movie"])

    full_address = fields[1]
    parsed_address = parse_address(full_address)

    if len(parsed_address) != 4:
        print parsed_address

    if len(fields) == 2:
        return (fields[0], fields[1], None) + parsed_name + parsed_address
    else:
        return (fields[0], fields[1], fields[2]) + parsed_name + parsed_address
Ejemplo n.º 13
0
def parse_movie(line):
    fields = filter(None, line.split("\t"))
    pt = util.parse_title(fields[0])
    parsed_name = (pt['title'], pt['year'], pt['tv_info'], pt['optional_info'],
                   pt['is_movie'])

    full_address = fields[1]
    parsed_address = parse_address(full_address)

    if len(parsed_address) != 4:
        print parsed_address

    if len(fields) == 2:
        return (fields[0], fields[1], None) + parsed_name + parsed_address
    else:
        return (fields[0], fields[1], fields[2]) + parsed_name + parsed_address
Ejemplo n.º 14
0
def parse_movie(line):
  fields = filter(None, line.split('\t'))
  pt = util.parse_title(fields[0])
  parsed_range = parse_range(fields[1])

  return (fields[0], fields[1]) + (pt['title'], pt['year'], pt['tv_info'], pt['optional_info'], pt['is_movie']) + parsed_range