def get_cast_crew(self,url):
        request=get_file(url)
        soup = BeautifulSoup(request.text)
        main_dic={}

        lst=[u'Cast',u'Production and Technical Credits']
        for i in xrange(len(lst)):
            main_dic[lst[i]]=np.nan
            dic={}
            try:
                lst[i]=soup.findAll('div',{'id':'cast'})[i].find('h1').text
                for row in soup.findAll('div',{'id':'cast'})[i].findAll('tr'):
                    position, filler, name = row.findAll('td')
                    position= unicodedata.normalize('NFKD', position.text).encode('ascii','ignore')
                    name = unicodedata.normalize('NFKD', name.text).encode('ascii','ignore')
                    if position in dic:
                        dic[position]+=[name]
                    else:
                        dic[position]=[name]
                dic=json.dumps(dic)
            except:
                dic=np.nan

            main_dic[lst[i]]=dic
        return main_dic
 def movie_list_to_df(self,movie_list):
     """
     Takes the list movies and process then adds the data to the dataframe
     """
     for movie in movie_list:
         request=get_file(self._base_link+movie)
         
         ##build dic of furture dataframe column with dic keys
         ##and dataframe value as dic[key]=value
         dic=self.scrape_movie_data_to_dic(request)
         cast_crew=self.get_cast_crew((self._base_link+movie).replace('summary','cast-and-crew'))
         
         ##merge together
         dic['Cast']=cast_crew[u'Cast']
         dic['Crew']=cast_crew[u'Production and Technical Credits']
         
         ##build new dataframe and append it to total
         df=pd.DataFrame.from_dict(dic)
         self._df=self._df.append(df)
 def get_year_page_movie_list(self,year):
     """
     Takes a given year and calls the webpage with all the movies that year
     then returns a list with all movies that have a budget listed.
     """
     
     ##get the webpage with all the movies from that year
     request=get_file(self._base_link+'/movies/year/'+str(year))
     soup = BeautifulSoup(request.text)
     output=[]
     for item in soup.findAll('table')[0].findAll('tr'):
         
         ##only add movies to list when they have budget listed
         budget=item.find("td", { "class" : "data" })
         if budget is not None and len(budget.text)>1: 
             s=item.find('a').decode().split('"')
             output.append(s[1])
     
     return output