Ejemplo n.º 1
0
 def Part3(self):
     cmd = ' '
     if self._python == False:
         cmd = base.StripMargin("""
     |express job \\
     |  ${EXPRESS_MUSIC_JAR} \\
     |  org.kiji.express.music.SongPlayCounter \\
     |  --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\
     |  --table-uri ${KIJI}/users \\
     |  --output ${HDFS_BASE}/express-tutorial/songcount-output \\
     |  --hdfs
     """)
     else:
         cmd = base.StripMargin("""
     |express.py \\
     |    job \\
     |    -libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\
     |    -user_jar=${EXPRESS_MUSIC_JAR} \\
     |    -job_name=org.kiji.express.music.SongPlayCounter \\
     |    -mode=hdfs \\
     |    --table-uri ${KIJI}/users \\
     |    --output ${HDFS_BASE}/express-tutorial/songcount-output \\
     """)
     play_count = self.Command(cmd)
     assert (play_count.exit_code == 0)
     fs_text = self.Command("""
     hadoop fs -text ${HDFS_BASE}/express-tutorial/songcount-output/part-00000 | head -3
     """)
     tutorial_test.Expect(expect=0, actual=fs_text.exit_code)
     lines = list(filter(None, self.StripJavaHomeLine(
         fs_text.output_lines)))  # filter empty lines
     tutorial_test.Expect(expect=3, actual=len(lines))
     for line in lines:
         tutorial_test.ExpectRegexMatch(expect=r'^song-\d+\t\d+$',
                                        actual=line)
Ejemplo n.º 2
0
    def Part1(self):
        """Runs the setup part of the KijiExpress Music tutorial.

    http://docs.kiji.org/tutorials/express-recommendation/DEVEL/express-setup/
    """

        # --------------------------------------------------------------------------

        install = self.Command('kiji install --kiji=${KIJI}')
        assert (install.exit_code == 0)
        assert ('Successfully created kiji instance: ' in install.output_text)

        # --------------------------------------------------------------------------

        create_table = self.Command(
            base.StripMargin("""
        |kiji-schema-shell \\
        |    --kiji=${KIJI} \\
        |    --file=${MUSIC_EXPRESS_HOME}/music-schema.ddl \\
        """))
        print(create_table.error_text)
        assert (create_table.exit_code == 0)

        # --------------------------------------------------------------------------

        list_tables = self.Command('kiji ls ${KIJI}')
        assert (list_tables.exit_code == 0)
        assert ('songs'
                in list_tables.output_text), ('Missing table "songs": %s' %
                                              list_tables.output_lines)
        assert ('users'
                in list_tables.output_text), ('Missing table "users": %s' %
                                              list_tables.output_lines)

        # --------------------------------------------------------------------------

        mkdir = self.Command('hadoop fs -mkdir ${HDFS_BASE}/express-tutorial/')
        assert (mkdir.exit_code == 0)

        copy = self.Command(
            base.StripMargin("""
        |hadoop fs -copyFromLocal \\
        |    ${MUSIC_EXPRESS_HOME}/example_data/*.json \\
        |    ${HDFS_BASE}/express-tutorial/
        """))
        assert (copy.exit_code == 0)
Ejemplo n.º 3
0
 def Part4(self):
     cmd = ' '
     if self._python == False:
         cmd = base.StripMargin("""
     |express job \\
     |    ${EXPRESS_MUSIC_JAR} \\
     |    org.kiji.express.music.TopNextSongs \\
     |    --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\
     |    --users-table ${KIJI}/users \\
     |    --songs-table ${KIJI}/songs --hdfs
     """)
     else:
         cmd = base.StripMargin("""
     |express.py \\
     |    job \\
     |    -libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\
     |    -user_jar=${EXPRESS_MUSIC_JAR} \\
     |    -job_name=org.kiji.express.music.TopNextSongs \\
     |    -mode=hdfs \\
     |    --users-table ${KIJI}/users \\
     |    --songs-table ${KIJI}/songs --hdfs
     """)
     top_songs = self.Command(cmd)
     assert (top_songs.exit_code == 0)
     list_rows = self.Command('kiji scan ${KIJI}/songs --max-rows=2')
     assert (list_rows.exit_code == 0)
     stripped_output = self.StripJavaHomeLine(list_rows.output_lines)
     assert (stripped_output[0].startswith('Scanning kiji table: kiji://'))
     assert (len(stripped_output) >= 5 * 2 + 1), len(stripped_output)
     for row in range(0, 2):
         tutorial_test.ExpectRegexMatch(
             expect=r"^entity-id=\['song-\d+'\] \[\d+\] info:metadata$",
             actual=stripped_output[1 + row * 5])
         tutorial_test.ExpectRegexMatch(
             expect=
             r"^\s*{\s*\"song_name\".*\"album_name\".*\"artist_name\".*\"genre\".*\"tempo\".*\"duration\".*\s*}\s*$",
             actual=stripped_output[2 + row * 5])
         tutorial_test.ExpectRegexMatch(
             expect=
             r"^entity-id=\['song-\d+'\] \[\d+\] info:top_next_songs$",
             actual=stripped_output[3 + row * 5])
         tutorial_test.ExpectRegexMatch(expect=r"^\s*{\s*\"top_songs\".*}$",
                                        actual=stripped_output[4 + row * 5])
         tutorial_test.ExpectRegexMatch(expect=r"^$",
                                        actual=stripped_output[5 + row * 5])
Ejemplo n.º 4
0
    def Part5(self):
        cmd = ' '
        if self._python == False:
            cmd = base.StripMargin("""
        |express job ${EXPRESS_MUSIC_JAR} \\
        |    org.kiji.express.music.SongRecommender \\
        |    --songs-table ${KIJI}/songs \\
        |    --users-table ${KIJI}/users
        """)
        else:
            cmd = base.StripMargin("""
        |express.py \\
        |    job \\
        |    -user_jar=${EXPRESS_MUSIC_JAR} \\
        |    -job_name=org.kiji.express.music.SongRecommender \\
        |    -mode=hdfs \\
        |    --songs-table ${KIJI}/songs \\
        |    --users-table ${KIJI}/users
        """)
        song_recommend = self.Command(cmd)
        assert (song_recommend.exit_code == 0)

        list_rows = self.Command("kiji scan ${KIJI}/users --max-rows=2")
        assert (list_rows.exit_code == 0)
        stripped_output = self.StripJavaHomeLine(list_rows.output_lines)
        assert (stripped_output[0].startswith('Scanning kiji table: kiji://'))
        assert (len(stripped_output) >= 5 * 2 + 1), len(stripped_output)
        for row in range(0, 2):
            tutorial_test.ExpectRegexMatch(
                expect=r"^entity-id=\['user-\d+'\] \[\d+\] info:track_plays$",
                actual=stripped_output[1 + row * 5])
            tutorial_test.ExpectRegexMatch(expect=r"^\s*song-\d+$",
                                           actual=stripped_output[2 + row * 5])
            tutorial_test.ExpectRegexMatch(
                expect=r"^entity-id=\['user-\d+'\] \[\d+\] info:next_song_rec$",
                actual=stripped_output[3 + row * 5])
            tutorial_test.ExpectRegexMatch(expect=r"^\s*song-\d+$",
                                           actual=stripped_output[4 + row * 5])
            tutorial_test.ExpectRegexMatch(expect=r"^$",
                                           actual=stripped_output[5 + row * 5])
Ejemplo n.º 5
0
    def Part2(self):
        """Runs the importing part of the KijiExpress Music tutorial.

    http://docs.kiji.org/tutorials/express-recommendation/DEVEL/express-importing-data/
    """

        # --------------------------------------------------------------------------

        cmd = ' '
        if self._python == False:
            cmd = base.StripMargin("""
          |express job \\
          |    ${EXPRESS_MUSIC_JAR} \\
          |    org.kiji.express.music.SongMetadataImporter \\
          |    --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\
          |    --input ${HDFS_BASE}/express-tutorial/song-metadata.json \\
          |    --table-uri ${KIJI}/songs --hdfs
          """)

        else:
            cmd = base.StripMargin("""
          |express.py \\
          |    job \\
          |    --libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\
          |    --user_jar=${EXPRESS_MUSIC_JAR} \\
          |    --job_name=org.kiji.express.music.SongMetadataImporter \\
          |    --mode=hdfs \\
          |    --input ${HDFS_BASE}/express-tutorial/song-metadata.json \\
          |    --table-uri ${KIJI}/songs
          """)

        songMetadataImport = self.Command(cmd)
        assert (songMetadataImport.exit_code == 0)

        # --------------------------------------------------------------------------

        list_rows = self.Command('kiji scan ${KIJI}/songs --max-rows=5')
        assert (list_rows.exit_code == 0)
        # Strip the first line from the output, if it is about $JAVA_HOME not set.
        stripped_output = self.StripJavaHomeLine(list_rows.output_lines)
        assert (stripped_output[0].startswith('Scanning kiji table: kiji://'))
        assert (len(stripped_output) >= 3 * 5 + 1), len(stripped_output)
        for row in range(0, 5):
            tutorial_test.ExpectRegexMatch(
                expect=r"^entity-id=\['song-\d+'\] \[\d+\] info:metadata$",
                actual=stripped_output[1 + row * 3])
            tutorial_test.ExpectRegexMatch(
                expect=
                r"^\s*{\s*\"song_name\".*\"album_name\".*\"artist_name\".*\"genre\".*\"tempo\".*\"duration\".*\s*}\s*$",
                actual=stripped_output[2 + row * 3])
            tutorial_test.ExpectRegexMatch(expect=r"^$",
                                           actual=stripped_output[3 + row * 3])

        # --------------------------------------------------------------------------

        cmd = ' '
        if self._python == False:
            cmd = base.StripMargin("""
          |express job \\
          |    ${EXPRESS_MUSIC_JAR} \\
          |    org.kiji.express.music.SongPlaysImporter \\
          |    --libjars "${MUSIC_EXPRESS_HOME}/lib/*" \\
          |    --input ${HDFS_BASE}/express-tutorial/song-plays.json \\
          |    --table-uri ${KIJI}/users --hdfs
          """)
        else:
            cmd = base.StripMargin("""
        |express.py \\
        |    job \\
        |    -libjars="${MUSIC_EXPRESS_HOME}/lib/*" \\
        |    -user_jar=${EXPRESS_MUSIC_JAR} \\
        |    -job_name=org.kiji.express.music.SongPlaysImporter \\
        |    -mode=hdfs \\
        |    --input ${HDFS_BASE}/express-tutorial/song-plays.json \\
        |    --table-uri ${KIJI}/users
        """)
        userDataImport = self.Command(cmd)
        assert (userDataImport.exit_code == 0)

        # --------------------------------------------------------------------------

        list_rows = self.Command('kiji scan ${KIJI}/users --max-rows=5')
        assert (list_rows.exit_code == 0)
        stripped_output = self.StripJavaHomeLine(list_rows.output_lines)
        assert (stripped_output[0].startswith('Scanning kiji table: kiji://'))
        assert (len(stripped_output) >= 3 * 5 + 1), len(stripped_output)
        for row in range(0, 5):
            tutorial_test.ExpectRegexMatch(
                expect=r"^entity-id=\['user-\d+'\] \[\d+\] info:track_plays$",
                actual=stripped_output[1 + row * 3])
            tutorial_test.ExpectRegexMatch(expect=r"^\s*song-\d+$",
                                           actual=stripped_output[2 + row * 3])
            tutorial_test.ExpectRegexMatch(expect=r"^$",
                                           actual=stripped_output[3 + row * 3])