Ejemplo n.º 1
0
def wiki_to_text(username, password, outputdir, sourcefile):
    lyrics = kuplett_parser.get_all_lyrics(sourcefile, username, password)
    data = kuplett_parser.load_data(sourcefile)

    counter = 0
    for lyric in lyrics:
        filename = outputdir + "/" + kuplett_parser.get_generic_name(
            data.get("urls")[counter])
        counter += 1

        if len(os.path.dirname(filename).strip()) > 0 and not os.path.exists(
                os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))

        outfile = codecs.open(filename, 'w', 'utf-8')
        for line in lyric:
            outfile.write(line + "\n")
    return lyrics
def kupletter_to_inputsong(username, password, outdirname):
    sourcefile = "data_2017.txt"
    lyrics = kuplett_parser.get_all_lyrics(sourcefile, username, password)
    #parse read text and write to an inputsong file
    inputsong_parser = Raw_to_inputsong_parser()

    #Various short forms of the different styles defined above.
    #Note that keys are cast to lowercase, and hence no upper case version is needed
    data = kuplett_parser.load_data(sourcefile)
    inputsong_parser.style_dictionary = data.get("dictionary")
    inputsong_parser.short_style_dictionary = data.get("shortdictionary")
    print(inputsong_parser.short_style_dictionary)

    delimiter = ':'  #separates singer from lyrics

    outfilenames = []
    akt = 1
    for page in kuplett_parser.load_data(sourcefile).get("urls"):
        name = kuplett_parser.get_generic_name(page)
        outfilenames.append("Akt" + str(akt) + "/" + name + ".txt")
        if "final" in name.lower():
            # This is kind of dangerous. Assumes that all songs with 'final'
            # in the same is the last song of each akt (part) of the spex.
            akt += 1

    i = 0
    for lyric in lyrics:
        #open relevant file
        outfilename = outdirname + "/" + outfilenames[i]
        folder = outfilename
        if folder.find("/") >= 0:
            folder = folder[:folder.rfind("/")]
        if not os.path.exists(folder):
            try:
                os.makedirs(folder)
            except OSError:
                print("Skipping creation of %s because it exists already.",
                      folder)
        outfile = codecs.open(outfilename, 'w', 'utf-8')

        #First we go through the lyric to get metadata that's
        #potentially been sprinkled throughout the text (bad spexare!)
        meta = get_metadata(lyric, delimiter)

        title_line = meta.titel
        mel_line = meta.melodi
        auth_line = meta.forf
        arr_line = meta.arr
        medv_line = meta.medv
        outfile.write(title_line + "\n")
        outfile.write(mel_line + "\n")
        outfile.write(auth_line + "\n")
        outfile.write(medv_line + "\n")
        outfile.write(arr_line + "\n")
        outfile.write("\n")

        #if the first line does not have a singer
        #we will interpret it and following lines as if everyone is singing
        #NOTE: empty lines are intepreted as belonging to the previous singer
        #but we manually override this behavior for the first line only.
        inputsong_parser.empty_style = "Alla"

        for line in preprocess_inputsong(lyric, delimiter):
            inputsong_line = inputsong_parser.parse_line_to_inputsong(
                line, delimiter)
            outfile.write(inputsong_line + "\n")

        i = i + 1
        outfile.close()
Ejemplo n.º 3
0
def get_diff_filename(sourcefile):
    return "diff/"+kuplett_parser.get_generic_name(sourcefile)
Ejemplo n.º 4
0
def get_diff_filename(sourcefile):
    return "diff/" + kuplett_parser.get_generic_name(sourcefile)
def kupletter_to_inputsong(username,password,outdirname):
    sourcefile = "data_2016.txt"
    lyrics=kuplett_parser.get_all_lyrics(sourcefile, username, password);
    #parse read text and write to an inputsong file
    inputsong_parser=Raw_to_inputsong_parser() 

    #Various short forms of the different styles defined above. 
    #Note that keys are cast to lowercase, and hence no upper case version is needed
    data = kuplett_parser.load_data(sourcefile);
    inputsong_parser.style_dictionary = data.get("dictionary")
    inputsong_parser.short_style_dictionary = data.get("shortdictionary");
    print(inputsong_parser.short_style_dictionary);

    delimiter=':' #separates singer from lyrics

    outfilenames = []
    akt = 1
    for page in kuplett_parser.load_data(sourcefile).get("urls"):
        name = kuplett_parser.get_generic_name(page);
        outfilenames.append("Akt"+str(akt)+"/"+name+".txt")
        if "final" in name.lower():
            # This is kind of dangerous. Assumes that all songs with 'final'
            # in the same is the last song of each akt (part) of the spex.
            akt += 1
    
    i=0
    for lyric in lyrics:
        #open relevant file
        outfilename=outdirname+"/"+outfilenames[i]
        folder = outfilename;
        if folder.find("/") >= 0:
            folder = folder[:folder.rfind("/")]
        if not os.path.exists(folder):
            try:
                os.makedirs(folder);
            except OSError:
                print("Skipping creation of %s because it exists already.", folder)
        outfile = codecs.open(outfilename, 'w','utf-8')
    
        #First we go through the lyric to get metadata that's
        #potentially been sprinkled throughout the text (bad spexare!)
        meta=get_metadata(lyric,delimiter)

        title_line=meta.titel
        mel_line=meta.melodi
        auth_line=meta.forf
        arr_line=meta.arr
        medv_line=meta.medv
        outfile.write(title_line+"\n")
        outfile.write(mel_line+"\n")
        outfile.write(auth_line+"\n")
        outfile.write(medv_line+"\n")
        outfile.write(arr_line+"\n")
        outfile.write("\n")
    
        #if the first line does not have a singer
        #we will interpret it and following lines as if everyone is singing
        #NOTE: empty lines are intepreted as belonging to the previous singer
        #but we manually override this behavior for the first line only.
        inputsong_parser.empty_style="Alla"

        for line in preprocess_inputsong(lyric,delimiter):
            inputsong_line=inputsong_parser.parse_line_to_inputsong(line,delimiter)
            outfile.write(inputsong_line+"\n")

        i=i+1
        outfile.close()