Exemple #1
0
 def __init__(self, parent=None):
     super(moviedata, self).__init__(parent)
     self.setupUi(self)
     self.setWindowTitle('电影数据')
     self.radioButton.clicked.connect(self.search)
     self.cld = cloud()
     self.rac = roleactor()
     self.box = boxchange()
     self.qua = quater()
     self.mon = month()
     self.pushButton.clicked.connect(self.cloud)
     self.pushButton_2.clicked.connect(self.roleactor)
     self.pushButton_3.clicked.connect(self.boxchange)
     self.pushButton_4.clicked.connect(self.quater)
     self.pushButton_5.clicked.connect(self.month)
def pre_WHT1(url):
    """Prints Text Output for a given URL from Whitehouse Speeches and Remarks"""

    import urllib2, sys, random
    import os
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(urllib2.urlopen(url).read())

    # Get URL
    url2 = "Cite: \n" + url + "\n"

    # Get Date
    Date = soup.find("div", {"class": "date"})
    raw_date = Date.get_text()
    date = raw_date.replace(' ', '', 12)

    # Get Release
    Release = soup.find("div", {"class": "release"})
    raw_release = Release.get_text()
    release = raw_release.replace(' ', '', 12) + "\n\n"

    # Get Title
    Title = soup.find("h1", {"property": "dc:title"})
    title = Title.get_text()

    # Get Paragraph Body
    content = soup.find("div", {"id": "content"})
    paragraph = ["".join(x.findAll(text=True)) for x in content.findAll("p")]
    paragraph_body = "\n\n%s" % ("\n\n".join(paragraph))

    #Get File ID - Date & Time

    #Date - RAW
    date_split = date.split(' ')
    month_raw = date_split[0]
    day_raw = date_split[1]
    year_raw = date_split[2]

    #MonthID
    month_clean1 = month_raw.replace(' ', '')
    month_clean2 = month_clean1.replace('\n', '')
    try:
        month_id = month(month_clean2)
    except:
        month_id = month_clean2

    #DayID
    day_clean1 = day_raw.replace(',', '')
    day_clean2 = day_clean1.replace(' ', '')
    day_clean3 = day_clean2.replace('\n', '')
    day_id = day_clean3

    #YearID
    year_clean1 = year_raw.replace(' ', '')
    year_clean2 = year_clean1.replace('\n', '')
    year_id = year_clean2

    #Final DateID
    date_id = year_id + '-' + month_id + '-' + day_id

    #Random ID
    randID1 = str(random.randrange(6, 10000, 1))
    randID2 = str(random.randrange(6, 10000, 1))

    try:
        path1 = date_id + "_" + "ID1" + ".txt"
        path2 = date_id + "_" + "ID2" + ".txt"
        path3 = date_id + "_" + "ID3" + ".txt"
        path4 = date_id + "_" + "ID4" + ".txt"
        path5 = date_id + "_" + "ID5" + ".txt"
        if os.path.isfile(path1) == False:
            #print "no file ID1 found, create ID1"
            f = open(date_id + "_" + "ID1" + ".txt", 'w')
            f.write(url2.encode('utf-8'))
            f.write(date.encode('utf-8'))
            f.write(release.encode('utf-8'))
            f.write(title.encode('utf-8'))
            f.write(paragraph_body.encode('utf-8'))
            f.close
            open_call1 = "open " + path1
            subprocess.call(open_call1, shell=True)
            return

        elif os.path.isfile(path1) == True:
            #print "found file ID1, check for ID2"
            if os.path.isfile(path2) == False:
                print "found ID1, no file ID2 found, make ID2"
                f = open(date_id + "_" + "ID2" + ".txt", 'w')
                f.write(url2.encode('utf-8'))
                f.write(date.encode('utf-8'))
                f.write(release.encode('utf-8'))
                f.write(title.encode('utf-8'))
                f.write(paragraph_body.encode('utf-8'))
                f.close
                open_call2 = "open " + path2
                subprocess.call(open_call2, shell=True)
                return
            elif os.path.isfile(path2) == True:
                #print "found file ID2, check for ID3"
                if os.path.isfile(path3) == False:
                    print "found IDs 1-2, no file ID3 found, make ID3"
                    f = open(date_id + "_" + "ID3" + ".txt", 'w')
                    f.write(url2.encode('utf-8'))
                    f.write(date.encode('utf-8'))
                    f.write(release.encode('utf-8'))
                    f.write(title.encode('utf-8'))
                    f.write(paragraph_body.encode('utf-8'))
                    f.close
                    open_call3 = "open " + path3
                    subprocess.call(open_call3, shell=True)
                    return
                elif os.path.isfile(path3) == True:
                    #print "found file ID3, check for ID4"
                    if os.path.isfile(path4) == False:
                        print "found IDs 1-3, no file ID4 found, make ID4"
                        f = open(date_id + "_" + "ID4" + ".txt", 'w')
                        f.write(url2.encode('utf-8'))
                        f.write(date.encode('utf-8'))
                        f.write(release.encode('utf-8'))
                        f.write(title.encode('utf-8'))
                        f.write(paragraph_body.encode('utf-8'))
                        f.close
                        open_call4 = "open " + path4
                        subprocess.call(open_call4, shell=True)
                        return
                    elif os.path.isfile(path4) == True:
                        #print "found file ID4, check for ID5"
                        if os.path.isfile(path5) == False:
                            print "found IDs 1-4, no file ID5 found, make ID5"
                            f = open(date_id + "_" + "ID5" + ".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(date.encode('utf-8'))
                            f.write(release.encode('utf-8'))
                            f.write(title.encode('utf-8'))
                            f.write(paragraph_body.encode('utf-8'))
                            f.close
                            open_call5 = "open " + path5
                            subprocess.call(open_call5, shell=True)
                            return
                        elif os.path.isfile(path5) == True:
                            print "found IDs 1-5, create random ID"
                            f = open(
                                date_id + "_" + "ID" + randID1 + "-" +
                                randID2 + ".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(date.encode('utf-8'))
                            f.write(release.encode('utf-8'))
                            f.write(title.encode('utf-8'))
                            f.write(paragraph_body.encode('utf-8'))
                            f.close
                            return

    finally:
        pass
def pre_WHT3(url):
    """Prints Text Output for a given URL from Whitehouse Speeches and Remarks"""
    
    import urllib2,sys, random
    import os
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(urllib2.urlopen(url).read())


    # Get URL
    url2 = "Cite: \n"+url+"\n"

    # Get Date
    Date = soup.find("meta", {"property":"article:published_time"})
    raw_date = str(Date).split('"', 2)[1][0:10]
    date = '\n'+raw_date+'\n'
    date_reform = parse(str(date)).strftime('%B %d, %Y')
    date_reformat = '\n'+date_reform+'\n'


    # Get Release
    Release = soup.find("meta", {"property":"og:title"})
    raw_release = str(Release)
    release = '\n'+'The White House''\n''Office of the Press Secretary'+'\n'+'\n'+'For Immediate Release'+date_reformat

    # Get Title
    Title = soup.find("meta", {"property":"og:title"})
    raw_title = str(Title).split('"', 2)[1]
    title = '\n'+raw_title+'\n'

    #Get File ID - Date & Time
    #Date - RAW
    date_split = date.split('-')
    month_raw = date_split[1]
    day_raw = date_split[2]
    year_raw = date_split[0]

    #MonthID
    month_clean1 = month_raw.replace(' ', '')
    month_clean2 = month_clean1.replace('\n', '')
    try:
        month_id = month(month_clean2)
    except:
        month_id = month_clean2

    #DayID
    day_clean1 = day_raw.replace(',', '')
    day_clean2 = day_clean1.replace(' ', '')
    day_clean3 = day_clean2.replace('\n', '')
    day_id = day_clean3

    #YearID
    year_clean1 = year_raw.replace(' ', '')
    year_clean2 = year_clean1.replace('\n', '')
    year_id = year_clean2

    #Final DateID
    date_id = year_id+'-'+month_id+'-'+day_id

    #Define Content
    content = soup.find("div", {"class":"field-items"})

    #Get Paragraph1p
    paragraph1p = ["".join(x.findAll(text=True)) for x in content.findAll("p")]
    paragraph_body1p = "\n\n%s" % ("\n\n".join(paragraph1p))

    #Get Paragraph1div
    paragraph1div = ["".join(x.findAll(text=True)) for x in content.findAll("div")]
    paragraph_body1div = "\n\n%s" % ("\n\n".join(paragraph1div))

    # Get Paragraph2
    paragraph2 = ["".join(x.findAll(text=True)) for x in content.findAll("div", {"class":"legacy-para"})]
    paragraph_body2lp = "\n\n%s" % ("\n\n".join(paragraph2))

    
    # Get Paragraph2p
    paragraph2p = ["".join(x.findAll(text=True)) for x in content.findAll("p")]


    # Test ID - Div - Legacy Para
    test_2 = paragraph_body2lp.replace(' ', '').replace('\n', '')

    # Test ID - 1p
    test_1p = paragraph_body1p.replace(' ', '').replace('\n', '')

    # Test ID - 1div
    test_1div = paragraph_body1div.replace(' ', '').replace('\n', '')

    try:
        if len(test_2) < 400:
            #print "paragraph body 2 short"
            paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p))
            paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))

        elif len(test_2) > 400:
            paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2))
            paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))


        elif test_2 == '':
            
            print "paragraph body 2 empty"

            if len(test_1p) < 400:
                if test_1div != '':
                    paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))
                    paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p))
                else:
                    paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))
                    paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p))     
            elif len(test_1p) > 400:
                paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1p))
                paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p))

        else:
            print "else"
            paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))
            paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p))

    except:
        print "except"
        paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2p))

        # Get Paragraph_Body1
        if test_1p == '':
            if test_1div != '':
                paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))
            else:
                print "paragraph_body1p and paragraph_body1div empty"
                paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))
        elif test_1p != '':
            paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1p))
        else:
            paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1div))
            print "paragraph_body1 empty"





    # Perform Quality Check on Parsed Speech
    speech_parser_two_para_QC(url, paragraph_body1, paragraph_body2)

    #Random ID
    randID1 = str(random.randrange(6, 10000, 1))
    randID2 = str(random.randrange(6, 10000, 1))



    try:
        path1 = date_id+"_"+"ID1"+".txt"
        path2 = date_id+"_"+"ID2"+".txt"
        path3 = date_id+"_"+"ID3"+".txt"
        path4 = date_id+"_"+"ID4"+".txt"
        path5 = date_id+"_"+"ID5"+".txt"
        if os.path.isfile(path1) == False:
            f = open(date_id+"_"+"ID1"+".txt", 'w')
            f.write(url2.encode('utf-8'))
            f.write(paragraph_body1.encode('utf-8'))
            f.write(paragraph_body2.encode('utf-8'))
            f.close
            return

        elif os.path.isfile(path1) == True:
            if os.path.isfile(path2) == False:
                print "found ID1, no file ID2 found, make ID2"
                f = open(date_id+"_"+"ID2"+".txt", 'w')
                f.write(url2.encode('utf-8'))
                f.write(paragraph_body1.encode('utf-8'))
                f.write(paragraph_body2.encode('utf-8'))
                f.close
                return
            elif os.path.isfile(path2) == True:
                if os.path.isfile(path3) == False:
                    print "found IDs 1-2, no file ID3 found, make ID3"
                    f = open(date_id+"_"+"ID3"+".txt", 'w')
                    f.write(url2.encode('utf-8'))
                    f.write(paragraph_body1.encode('utf-8'))
                    f.write(paragraph_body2.encode('utf-8'))
                    f.close
                    return
                elif os.path.isfile(path3) == True:
                    if os.path.isfile(path4) == False:
                        print "found IDs 1-3, no file ID4 found, make ID4"
                        f = open(date_id+"_"+"ID4"+".txt", 'w')
                        f.write(url2.encode('utf-8'))
                        f.write(paragraph_body1.encode('utf-8'))
                        f.write(paragraph_body2.encode('utf-8'))
                        f.close
                        return
                    elif os.path.isfile(path4) == True:
                        if os.path.isfile(path5) == False:
                            print "found IDs 1-4, no file ID5 found, make ID5"
                            f = open(date_id+"_"+"ID5"+".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(paragraph_body1.encode('utf-8'))
                            f.write(paragraph_body2.encode('utf-8'))
                            f.close
                            return
                        elif os.path.isfile(path5) == True:
                            print "found IDs 1-5, create random ID"
                            f = open(date_id+"_"+"ID"+randID1+"-"+randID2+".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(paragraph_body1.encode('utf-8'))
                            f.write(paragraph_body2.encode('utf-8'))
                            f.close
                            return 

        
    finally:
        pass
def pre_WHT3(url):
    """Prints Text Output for a given URL from Whitehouse Speeches and Remarks"""

    import urllib2, sys, random
    import os
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(urllib2.urlopen(url).read())

    # Get URL
    url2 = "Cite: \n" + url + "\n"

    try:
        # Get Paragraph2
        content2 = soup.find("div", {"class": "legacy-content"})
        paragraph2 = [
            "".join(x.findAll(text=True)) for x in content2.findAll("div")
        ]

        #Pargraph Body2
        paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2))

        # Test ID
        test_raw = paragraph_body2
        test1 = test_raw.replace(' ', '')
        test_id = test1.replace('\n', '')

        if test_id == '':
            print "paragraph body 2 empty"
            content1 = soup.find("div", {"id": "content"})
            paragraph1 = [
                "".join(x.findAll(text=True)) for x in content1.findAll("p")
            ]
            paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1))
            try:
                date_block_raw = paragraph_body2[0:200]
                date_block_clean1 = date_block_raw.replace('_', '')
                date_block_clean2 = date_block_clean1.replace('\n', ' ')
                date_block_clean3 = date_block_clean2.replace(u'\xa0', '')
                date_block_clean4 = date_block_clean3.replace(' ', '', 16)

                #Date - RAW
                date_split = date_block_clean4.split(' ')
                month_raw = date_split[3]
                day_raw = date_split[4]
                year_raw = date_split[5]

                #MonthID
                month_clean1 = month_raw.replace(' ', '')
                month_clean2 = month_clean1.replace('\n', '')
                try:
                    month_id = month(month_clean2)
                except:
                    month_id = month_clean2

                #DayID
                day_clean1 = day_raw.replace(',', '')
                day_clean2 = day_clean1.replace(' ', '')
                day_clean3 = day_clean2.replace('\n', '')
                day_id = day_clean3

                #YearID
                year_clean1 = year_raw.replace(' ', '')
                year_clean2 = year_clean1.replace('\n', '')
                year_id = year_clean2

                #Final DateID
                date_id = year_id + '-' + month_id + '-' + day_id

            except:
                #DateID
                date_id = "2009-2010" + '-' + url[60:75]
                pass

        elif len(test_id) < 400:
            print "paragraph body 2 not correct"
            content1 = soup.find("div", {"id": "content"})
            paragraph1 = [
                "".join(x.findAll(text=True)) for x in content1.findAll("p")
            ]
            paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1))
            try:
                date_block_raw = paragraph_body2[0:200]
                date_block_clean1 = date_block_raw.replace('_', '')
                date_block_clean2 = date_block_clean1.replace('\n', ' ')
                date_block_clean3 = date_block_clean2.replace(u'\xa0', '')
                date_block_clean4 = date_block_clean3.replace(' ', '', 16)

                #Date - RAW
                date_split = date_block_clean4.split(' ')
                month_raw = date_split[3]
                day_raw = date_split[4]
                year_raw = date_split[5]

                #MonthID
                month_clean1 = month_raw.replace(' ', '')
                month_clean2 = month_clean1.replace('\n', '')
                try:
                    month_id = month(month_clean2)
                except:
                    month_id = month_clean2

                #DayID
                day_clean1 = day_raw.replace(',', '')
                day_clean2 = day_clean1.replace(' ', '')
                day_clean3 = day_clean2.replace('\n', '')
                day_id = day_clean3

                #YearID
                year_clean1 = year_raw.replace(' ', '')
                year_clean2 = year_clean1.replace('\n', '')
                year_id = year_clean2

                #Final DateID
                date_id = year_id + '-' + month_id + '-' + day_id

            except:
                #DateID
                date_id = "2009-2010" + '-' + url[60:75]
                pass

        else:
            print "else"
            paragraph_body1 = ' '

            try:
                date_block_raw = paragraph_body2[0:200]
                date_block_clean1 = date_block_raw.replace('_', '')
                date_block_clean2 = date_block_clean1.replace('\n', ' ')
                date_block_clean3 = date_block_clean2.replace(u'\xa0', '')
                date_block_clean4 = date_block_clean3.replace(' ', '', 16)

                #Date - RAW
                date_split = date_block_clean4.split(' ')
                month_raw = date_split[3]
                day_raw = date_split[4]
                year_raw = date_split[5]

                #MonthID
                month_clean1 = month_raw.replace(' ', '')
                month_clean2 = month_clean1.replace('\n', '')
                try:
                    month_id = month(month_clean2)
                except:
                    month_id = month_clean2

                #DayID
                day_clean1 = day_raw.replace(',', '')
                day_clean2 = day_clean1.replace(' ', '')
                day_clean3 = day_clean2.replace('\n', '')
                day_id = day_clean3

                #YearID
                year_clean1 = year_raw.replace(' ', '')
                year_clean2 = year_clean1.replace('\n', '')
                year_id = year_clean2

                #Final DateID
                date_id = year_id + '-' + month_id + '-' + day_id

            except:
                #DateID
                date_id = "2009-2010" + '-' + url[60:75]
                pass

    except:
        # Get Paragraph1
        content1 = soup.find("div", {"id": "content"})
        paragraph1 = [
            "".join(x.findAll(text=True)) for x in content1.findAll("p")
        ]

        # Get Paragraph Body1
        paragraph_body1 = "\n\n%s" % ("\n\n".join(paragraph1))

        # Test ID
        test_raw = paragraph_body1
        test1 = test_raw.replace(' ', '')
        test_id = test1.replace('\n', '')

        if test_id == '':
            content2 = soup.find("div", {"class": "legacy-content"})
            paragraph2 = [
                "".join(x.findAll(text=True)) for x in content2.findAll("div")
            ]
            paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2))
        elif len(test_id) < 400:
            content2 = soup.find("div", {"class": "legacy-content"})
            paragraph2 = [
                "".join(x.findAll(text=True)) for x in content2.findAll("div")
            ]
            paragraph_body2 = "\n\n%s" % ("\n\n".join(paragraph2))
        else:
            paragraph_body2 = ' '

        #DateID
        date_id = "2009-2010" + '-' + url[60:75]

    #Random ID
    randID1 = str(random.randrange(6, 10000, 1))
    randID2 = str(random.randrange(6, 10000, 1))

    try:
        path1 = date_id + "_" + "ID1" + ".txt"
        path2 = date_id + "_" + "ID2" + ".txt"
        path3 = date_id + "_" + "ID3" + ".txt"
        path4 = date_id + "_" + "ID4" + ".txt"
        path5 = date_id + "_" + "ID5" + ".txt"
        if os.path.isfile(path1) == False:
            #print "no file ID1 found, create ID1"
            f = open(date_id + "_" + "ID1" + ".txt", 'w')
            f.write(url2.encode('utf-8'))
            f.write(paragraph_body1.encode('utf-8'))
            f.write(paragraph_body2.encode('utf-8'))
            f.close
            open_call1 = "open " + path1
            subprocess.call(open_call1, shell=True)
            return

        elif os.path.isfile(path1) == True:
            #print "found file ID1, check for ID2"
            if os.path.isfile(path2) == False:
                print "found ID1, no file ID2 found, make ID2"
                f = open(date_id + "_" + "ID2" + ".txt", 'w')
                f.write(url2.encode('utf-8'))
                f.write(paragraph_body1.encode('utf-8'))
                f.write(paragraph_body2.encode('utf-8'))
                f.close
                open_call2 = "open " + path2
                subprocess.call(open_call2, shell=True)
                return
            elif os.path.isfile(path2) == True:
                #print "found file ID2, check for ID3"
                if os.path.isfile(path3) == False:
                    print "found IDs 1-2, no file ID3 found, make ID3"
                    f = open(date_id + "_" + "ID3" + ".txt", 'w')
                    f.write(url2.encode('utf-8'))
                    f.write(paragraph_body1.encode('utf-8'))
                    f.write(paragraph_body2.encode('utf-8'))
                    f.close
                    open_call3 = "open " + path3
                    subprocess.call(open_call3, shell=True)
                    return
                elif os.path.isfile(path3) == True:
                    #print "found file ID3, check for ID4"
                    if os.path.isfile(path4) == False:
                        print "found IDs 1-3, no file ID4 found, make ID4"
                        f = open(date_id + "_" + "ID4" + ".txt", 'w')
                        f.write(url2.encode('utf-8'))
                        f.write(paragraph_body1.encode('utf-8'))
                        f.write(paragraph_body2.encode('utf-8'))
                        f.close
                        open_call4 = "open " + path4
                        subprocess.call(open_call4, shell=True)
                        return
                    elif os.path.isfile(path4) == True:
                        #print "found file ID4, check for ID5"
                        if os.path.isfile(path5) == False:
                            print "found IDs 1-4, no file ID5 found, make ID5"
                            f = open(date_id + "_" + "ID5" + ".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(paragraph_body1.encode('utf-8'))
                            f.write(paragraph_body2.encode('utf-8'))
                            f.close
                            open_call5 = "open " + path5
                            subprocess.call(open_call5, shell=True)
                            return
                        elif os.path.isfile(path5) == True:
                            print "found IDs 1-5, create random ID"
                            f = open(
                                date_id + "_" + "ID" + randID1 + "-" +
                                randID2 + ".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(paragraph_body1.encode('utf-8'))
                            f.write(paragraph_body2.encode('utf-8'))
                            f.close
                            return

    finally:
        pass
def pre_WHT1(url):
    """Prints Text Output for a given URL from Whitehouse Speeches and Remarks"""
    
    import urllib2,sys, random
    import os
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(urllib2.urlopen(url).read())

    # Get URL
    url2 = "Cite: \n"+url+"\n"


    # Get Date
    Date = soup.find("meta", {"property":"article:published_time"})
    raw_date = str(Date).split('"', 2)[1][0:10]
    date = '\n'+raw_date+'\n'
    date_reform = parse(str(date)).strftime('%B %d, %Y')
    date_reformat = '\n'+date_reform+'\n'


    # Get Release
    Release = soup.find("meta", {"property":"og:title"})
    raw_release = str(Release)
    release = '\n'+'The White House''\n''Office of the Press Secretary'+'\n'+'\n'+'For Immediate Release'+date_reformat

    # Get Title
    Title = soup.find("meta", {"property":"og:title"})
    raw_title = str(Title).split('"', 2)[1]
    title = '\n'+raw_title+'\n'

    
    # Get Paragraph Body
    content = soup.find("div", {"class":"field-items"})
    paragraph = ["".join(x.findAll(text=True)) for x in content.findAll("p")]
    paragraph_body = "\n\n%s" % ("\n\n".join(paragraph))

    speech_parser_QC(url, paragraph_body, release)


    #Get File ID - Date & Time
    #Date - RAW
    date_split = date.split('-')
    month_raw = date_split[1]
    day_raw = date_split[2]
    year_raw = date_split[0]

    #MonthID
    month_clean1 = month_raw.replace(' ', '')
    month_clean2 = month_clean1.replace('\n', '')
    try:
        month_id = month(month_clean2)
    except:
        month_id = month_clean2

    #DayID
    day_clean1 = day_raw.replace(',', '')
    day_clean2 = day_clean1.replace(' ', '')
    day_clean3 = day_clean2.replace('\n', '')
    day_id = day_clean3

    #YearID
    year_clean1 = year_raw.replace(' ', '')
    year_clean2 = year_clean1.replace('\n', '')
    year_id = year_clean2

    #Final DateID
    date_id = year_id+'-'+month_id+'-'+day_id

    #Random ID
    randID1 = str(random.randrange(6, 10000, 1))
    randID2 = str(random.randrange(6, 10000, 1))

    try:
        path1 = date_id+"_"+"ID1"+".txt"
        path2 = date_id+"_"+"ID2"+".txt"
        path3 = date_id+"_"+"ID3"+".txt"
        path4 = date_id+"_"+"ID4"+".txt"
        path5 = date_id+"_"+"ID5"+".txt"
        if os.path.isfile(path1) == False:
            #print "no file ID1 found, create ID1"
            f = open(date_id+"_"+"ID1"+".txt", 'w')
            f.write(url2.encode('utf-8'))
            f.write(release.encode('utf-8'))
            f.write(title.encode('utf-8'))
            f.write(paragraph_body.encode('utf-8'))
            f.close
            return

        elif os.path.isfile(path1) == True:
            #print "found file ID1, check for ID2"
            if os.path.isfile(path2) == False:
                print "found ID1, no file ID2 found, make ID2"
                f = open(date_id+"_"+"ID2"+".txt", 'w')
                f.write(url2.encode('utf-8'))
                f.write(release.encode('utf-8'))
                f.write(title.encode('utf-8'))
                f.write(paragraph_body.encode('utf-8'))
                f.close
                return
            elif os.path.isfile(path2) == True:
                #print "found file ID2, check for ID3"
                if os.path.isfile(path3) == False:
                    print "found IDs 1-2, no file ID3 found, make ID3"
                    f = open(date_id+"_"+"ID3"+".txt", 'w')
                    f.write(url2.encode('utf-8'))
                    f.write(release.encode('utf-8'))
                    f.write(title.encode('utf-8'))
                    f.write(paragraph_body.encode('utf-8'))
                    f.close
                    return
                elif os.path.isfile(path3) == True:
                    #print "found file ID3, check for ID4"
                    if os.path.isfile(path4) == False:
                        print "found IDs 1-3, no file ID4 found, make ID4"
                        f = open(date_id+"_"+"ID4"+".txt", 'w')
                        f.write(url2.encode('utf-8'))
                        f.write(release.encode('utf-8'))
                        f.write(title.encode('utf-8'))
                        f.write(paragraph_body.encode('utf-8'))
                        f.close
                        return
                    elif os.path.isfile(path4) == True:
                        #print "found file ID4, check for ID5"
                        if os.path.isfile(path5) == False:
                            print "found IDs 1-4, no file ID5 found, make ID5"
                            f = open(date_id+"_"+"ID5"+".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(release.encode('utf-8'))
                            f.write(title.encode('utf-8'))
                            f.write(paragraph_body.encode('utf-8'))
                            f.close
                            return
                        elif os.path.isfile(path5) == True:
                            print "found IDs 1-5, create random ID"
                            f = open(date_id+"_"+"ID"+randID1+"-"+randID2+".txt", 'w')
                            f.write(url2.encode('utf-8'))
                            f.write(release.encode('utf-8'))
                            f.write(title.encode('utf-8'))
                            f.write(paragraph_body.encode('utf-8'))
                            f.close
                            return 

        
    finally:
        pass