Beispiel #1
0
    def __init__(self, tvs, season, episode):
        self.tvs = tvs
        self.season = season
        self.episode = episode
        if tvs == 'G' or tvs == 'g':
            self.subDir = self.dirFeaturesGoT
            self.subtlDir = self.subtlDir + 'GoT/English/GameOfThrones.'
            self.saveClstResult = self.saveClstResult + 'GoT_'
            GoT = True
        elif tvs == 'B' or tvs == 'b':
            self.subDir = self.dirFeaturesBB
            self.subtlDir = self.subtlDir + 'BB/English/BreakingBad.'
            self.saveClstResult = self.saveClstResult + 'BB_'
            GoT = False
        else:
            print('you did not choose a TV-Series! bye bye')

        self.Season = "Season0" + str(season)
        self.Episode = ("Episode" +
                        str(episode)) if int(episode) >= 10 else ("Episode0" +
                                                                  str(episode))
        self.fileName = self.Season + '.' + self.Episode
        self.subtlFile = self.subtlDir + self.fileName + '.en.srt'
        self.saveClstResult = self.saveClstResult + self.Season + 'Result.ixt'
        print('You have chosen :')
        if GoT == True:
            print('Tv-Series: Game of Thrones')
        else:
            print('Tv-Series: Breaking bad')
        print('Season: ', self.season)
        print('Episode:', self.episode)
        print('Subtile File : ', self.subtlFile)

        self.sub = sb.Subtitle(self.subtlFile)
Beispiel #2
0
    def analyse(self, subtitleFile, wordFrequencyDistribution):
        global currentSubtitle
        global nextSubtitle

        firstRun = True
        try:
            subFile = open(subtitleFile)
        except Exception:
            return 0

        # Get rid of number of lines in file symbol
        subFile.readline()

        lineNumber = 0
        firstRun = True

        while (True):
            subNumber = subFile.readline()
            subNumber = subNumber.strip()

            if ((subNumber == '') or (subNumber.isspace())):
                break
            else:

                lineNumber += 1

                timeSub = subFile.readline()

                if (timeSub == None):
                    raise ValueError('Error parsing time subtitle')

                subtitleString = ''
                s = None
                newSubtitleFound = False
                nextSubtitleText = ''
                while (True):
                    s = subFile.readline()
                    if ((s == '') or (s.isspace())):
                        break
                    else:
                        if ((s.strip().startswith('-'))
                                and (len(subtitleString) > 0)):
                            newSubtitleFound = True
                            nextSubtitleText = s
                        else:
                            subtitleString += s + ' '

                startTime = int(self.parse(timeSub.split('-->')[0]))
                stopTime = int(self.parse(timeSub.split('-->')[1]))

                #print(startTime)
                #print(stopTime)

                number = int(subNumber)

                nextSubtitle = Subtitle.Subtitle(number, startTime, stopTime,
                                                 subtitleString)

                if (firstRun == True):
                    firstRun = False
                    currentSubtitle = nextSubtitle
                else:
                    self.addToFrequencyDistributionWithFreq(
                        wordFrequencyDistribution)
                    #self.addToFrequencyDistributionEmpty()
                    if (newSubtitleFound == True):
                        nextSubtitle = Subtitle.Subtitle(
                            number, startTime, stopTime, nextSubtitleText)
                        self.addToFrequencyDistributionWithFreq(
                            wordFrequencyDistribution)
Beispiel #3
0
    def analyse(self, subtitleFile):
        global currentSubtitle
        global nextSubtitle

        global mostCommonWords

        global responseGroupId

        print(responseGroupId)

        firstRun = True

        try:
            print('Attempting to open ' + subtitleFile)
            subFile = open(subtitleFile, 'r')
            #subFile = codecs.open(subtitleFile, 'r', encoding=encodingEstimate)

        except Exception as e:
            print('Could not open, wrong encoding perhaps?')

        firstRun = True

        while (True):
            try:
                subNumber = subFile.readline()
                subNumber = subNumber.strip()
                if ((subNumber == '') or (subNumber.isspace())
                        or (not (subNumber.isdigit()))):
                    break
                else:

                    try:
                        timeSub = subFile.readline()

                        if (timeSub == None):
                            raise ValueError('Error parsing time subtitle')

                        subtitleString = ''
                        s = None
                        newSubtitleFound = False
                        nextSubtitleText = ''
                        while (True):
                            s = subFile.readline()
                            if ((s == '') or (s.isspace())):
                                break
                            else:
                                if ((s.strip().startswith('-'))
                                        and (len(subtitleString) > 0)):
                                    newSubtitleFound = True
                                    nextSubtitleText = s
                                else:
                                    subtitleString += s + ' '

                        startTime = int(self.parse(timeSub.split('-->')[0]))
                        stopTime = int(self.parse(timeSub.split('-->')[1]))

                        #print(startTime)
                        #print(stopTime)

                        number = int(subNumber)

                        nextSubtitle = Subtitle.Subtitle(
                            number, startTime, stopTime, subtitleString)

                        if (firstRun == True):
                            firstRun = False
                            currentSubtitle = nextSubtitle
                        else:
                            self.checkSubtitlesAreOk()
                            if (newSubtitleFound == True):
                                nextSubtitle = Subtitle.Subtitle(
                                    number, startTime, stopTime,
                                    nextSubtitleText)
                                self.checkSubtitlesAreOk()

                    except Exception as e:
                        print('Could not parse sub, exception was ' + str(e))
                    #return frequencyDistribution
            except Exception as e:
                print('Could not parse sub file, exception was ' + str(e))