Esempio n. 1
0
def calcScore():
    stop = stopwords.words('english') + ["scene change"] + ['.',',']
    subtitle_list = get_subtitle_list_from_file("../subtitles/el.srt")
    print "Subtitles Parsed..."
    iter = 0
    # print stop
    score = list()
    for el in subtitle_list:
        token = word_tokenize(el['text'])
        token = [x for x in token if x not in stop]
        words = dict()
        for to in token:
            to = to.lower()
            if words.has_key(to):
                words[to] = words[to] + 1
            else:
                words[to] = 1
        if len(words)==0:
            continue
        ret = get_results(words)
        ret['time']=el['time']
        score.append(ret)
    print "Scoring Done..."
    with open("data/emp.json","w+") as f:
        json.dump(score,f)
        print "Saved."
Esempio n. 2
0
def calcScore():
    stop = stopwords.words('english') + ["scene change"] + ['.', ',']
    subtitle_list = get_subtitle_list_from_file("../subtitles/el.srt")
    print "Subtitles Parsed..."
    iter = 0
    # print stop
    score = list()
    for el in subtitle_list:
        token = word_tokenize(el['text'])
        token = [x for x in token if x not in stop]
        words = dict()
        for to in token:
            to = to.lower()
            if words.has_key(to):
                words[to] = words[to] + 1
            else:
                words[to] = 1
        if len(words) == 0:
            continue
        ret = get_results(words)
        ret['time'] = el['time']
        score.append(ret)
    print "Scoring Done..."
    with open("data/emp.json", "w+") as f:
        json.dump(score, f)
        print "Saved."
Esempio n. 3
0
def fill_dialogues_times():
    # TODO[@9310gaurav] fix the sql syntax according to the update table syntax
    subtitle_list = get_subtitle_list_from_file("/Users/arkanath/Dropbox/IIT-Kgp_Coursework/NLP/NLP_Project/flixify/subtitles/gladiator.srt")
    db = mdb.connect(host="10.5.18.68", user="******", passwd="dual12", db="12CS30010", cursorclass=curs.DictCursor, charset='utf8')
    cur = db.cursor()
    cur.execute('select * from scriptData where typ="dialogue"')
    data = cur.fetchall()
    for entry in data:
        # print entry['content']
        time = find_nearest_time(subtitle_list, entry['content'])
        if(time!=""):
            print entry['inde'], time
            cur.execute('update scriptData set time="'+ time +'" where inde = "' + entry['inde'] +'"')