def get_price_data_from_csv( commodityId ):
     filename = "price_data/master_list/" + str( commodityId ) + ".csv"
     try:
         f = open( filename , "r" )
         
         #have to get rid of the \n at the end of the line
         name = f.readline()[0:-1]
         datapoints = []
         for line in f:
             datapoints.append( DataPoint.from_csv_data( line ) )
         return CommodityPriceData( commodityId , name , datapoints )
     except IOError:
         return None
Exemple #2
0
    def get_price_data_from_html( objectId ):
        page = requests.get( "http://services.runescape.com/m=itemdb_oldschool/viewitem?obj=" + str( objectId ) )
        html = page.text
        
        #invalid object ID
        if ( "Sorry, there was a problem with your request." in html ):
            if ( "You've made too many requests recently." in html and \
                    "As a result, your IP address has been temporarily blocked. Please try again later." in html ):
                print "Computer IP has been blocked. Trying again in 15 seconds..."
                sleep( 15 )
                return PriceCrawler.get_price_data_from_html( objectId )
            return None
        
        #we can find the name in the title of the webpage.
        name = str( re.search( r'(?<=<title>)(.*)(?= - Grand Exchange)' , html ).group( 0 ) )
        
        #and the price data is always pushed to the graphs on the webpage
        #with the command "average180.push( ... )" so we are just interested
        #in lines with that command
        priceData = re.findall( r'average180.push.*' , html )
        volumeData = re.findall( r'trade180.push.*' , html )
        
        datapoints = []
        
        for price , volume in zip( priceData , volumeData ):
            priceNumbers = re.findall( r'\d+' , price )
            
            #we'll keep years, months, and days in string form because
            #we want there to always be 4 digits in a year, 2 digits in 
            #a month and 2 digits in a day. If we converted them to integers,
            #we would lose a digit sometimes - e.g. 01 would be converted to 1
            #and we'd lose consistency.
            year = str( priceNumbers[ 1 ] )
            month = str( priceNumbers[ 2 ] )
            day = str( priceNumbers[ 3 ] )
            
            #prices, on the other hand, can be converted to integers because
            #all commodities will always cost an integer number of coins.
            price = int( priceNumbers[ 4 ] )
            average = int( priceNumbers[ 5 ] )
            
            volumeNumbers = re.findall( r'\d+' , volume )
            
            #volume can also be converted to integers
            #because all commodities will have an integer volume each day
            volume = int( volumeNumbers[ 4 ] )
            
            datapoints.append( DataPoint( year , month , day , price , average , volume ) )

        return CommodityPriceData( objectId , name , datapoints )    
 def read_month_data( month , year , commodity ):
     rtn = MonthData( month , year )
     dir = "price_data/" + str( year ) + " " + DateUtils.format_month( month )
     try :
         file = open( dir + "/" + commodity + ".csv" , "r" )
         lines = file.readlines()
         for line in lines :
             datapoint = DataPoint.from_csv_month_data( year , month , line )
             rtn.set( int( datapoint.get_day() ) , datapoint )
     except IOError:
         
         #there is no data, so ignore the error and return default
         #values of 0 for daily and average prices
         pass
     
     return rtn
Exemple #4
0
    def get_price_data_from_json( name , objectId ):
        page = requests.get( "http://services.runescape.com/m=itemdb_oldschool/api/graph/" + str(objectId) + ".json" )        
        json = page.text
        
        #bad object ID
        if ( "404 - Page not found" in json ):
            return None
         
        #split the data given to us into the two halves: the first half
        #is our daily price data. the second half is our average price data
        dailyPriceJson = json[0:json.find( "average" )]
        averagePriceJson = json[json.find( "average"):len(json)]
        
        datapoints = []
        
        #data comes in the form {timestamp:value, timestamp:value, ...}
        #where all timestamps and prices are integer values,
        #so we can just parse out all the integer values and process them
        #in pairs of 2.
        
        #note that the average price data and daily price data must have
        #the same length, so we can iterate through both lists simultaneously
        priceData = re.findall( r'\d+' , dailyPriceJson )   
        averageData = re.findall( r'\d+' , averagePriceJson )
        for i in range( 0 , len( priceData ) , 2 ):
            
            timestamp = priceData[ i ]
            
            #have to add an extra 12 hours because Jagex is several hours ahead.
            #we just add 12 hours to be safe. We are only interested in dates
            #and the actual hour of day does not matter to us.
            dateValues = re.findall( r'\d+' , str(datetime.datetime.fromtimestamp( int(timestamp)/1000 + 43200 )) )
            year = str( dateValues[ 0 ] )
            month = str( dateValues[ 1 ] )
            day = str( dateValues[ 2 ] )
            price = int( priceData[ i+1 ] )
            average = int( averageData[ i+1 ] )
            datapoints.append( DataPoint( year , month , day , price , average ) )

        return CommodityPriceData( objectId , name , datapoints )