Exemple #1
1
def downloadForecast():
    #download
    start_date = datetime.date.today()
    end_date = start_date + datetime.timedelta(days=FORECAST_RANGE)
    f = '00'
    forecast_types = {
        FORECAST_FLX_FOLDER:
        'http://nomads.ncep.noaa.gov/cgi-bin/filter_cfs_flx.pl?file=flxf{f_year}{f_month:02}{f_day:02}{time}.01.{year}{month:02}{day:02}{f}.grb2&lev_2_m_above_ground=on&lev_surface=on&var_PRATE=on&var_TMAX=on&var_TMIN=on&var_TMP=on&subregion=&leftlon=-76&rightlon=-52&toplat=-19&bottomlat=-56&dir=%%2Fcfs.{year}{month:02}{day:02}%%2F{f}%%2F6hrly_grib_01',
        FORECAST_PGB_FOLDER:
        'http://nomads.ncep.noaa.gov/cgi-bin/filter_cfs_pgb.pl?file=pgbf{f_year}{f_month:02}{f_day:02}{time}.01.{year}{month:02}{day:02}{f}.grb2&lev_2_m_above_ground=on&lev_surface=on&var_APCP=on&var_RH=on&subregion=&leftlon=-76&rightlon=-52&toplat=-19&bottomlat=-56&dir=%%2Fcfs.{year}{month:02}{day:02}%%2F{f}%%2F6hrly_grib_01'
    }
    for key in forecast_types.keys():
        for a_date in daterange(start_date, end_date):
            for a_time in ['00', '06', '12', '18']:
                url = forecast_types[key].format(year=start_date.year,
                                                 month=start_date.month,
                                                 day=start_date.day,
                                                 f_year=a_date.year,
                                                 f_month=a_date.month,
                                                 f_day=a_date.day,
                                                 time=a_time,
                                                 f=f)
                folder = key
                filename = getFilenameForGDAS(a_date, a_time, f=f)
                open(folder + '/' + filename,
                     'wb').write(urllib.request.urlopen(url).read())
                time.sleep(SLEEP)
Exemple #2
0
def dailymessagesperuser(channelid: str, userid: str) -> typing.Tuple[str, str, dict]:
    """Returns discord channel name, user name and a dictionary including the number of messages every day from the start of the log to the end"""
    data = decompressdata(channelid)

    username = None
    daily = {}

    # Init daily message count dictionary with all days between first and last message
    start_dt = datetime.date.fromisoformat(data["messages"][0]["timestamp"].split("T")[0])
    end_dt = datetime.date.fromisoformat(data["messages"][-1]["timestamp"].split("T")[0])
    for dt in daterange(start_dt, end_dt):
        daily[dt.strftime("%Y-%m-%d")] = 0

    for i in range(len(data["messages"])):
        user = data["messages"][i]["author"]["id"]
        if user != userid:
            continue

        date = data["messages"][i]["timestamp"].split("T")[0]
        username = f'{data["messages"][i]["author"]["name"]}#{data["messages"][i]["author"]["discriminator"]}'
        daily[date] += 1

    # Remove empty messages from before the first message by user was sent
    for dt in daterange(start_dt, end_dt):
        if not daily[dt.strftime("%Y-%m-%d")]:
            del daily[dt.strftime("%Y-%m-%d")]
        else:
            break

    return (data['channel']['name'], username, daily)
Exemple #3
0
def scrape(origin, destination, start_date, end_date):
    # Creates a file with the cheapest outgoing and return flights for a given
    # date range.
    items = int((end_date - start_date).days) - 1
    date_list = daterange(start_date, end_date)

    data = {}

    for index, (depart_date, return_date) in enumerate(pairwise(date_list)):
        try:
            depart_list, return_list = search_flight(destination, depart_date,
                                                     return_date, params)
        except:
            log.error(f"Some error occured, retrying item {index+1}/{items}")
        data = open_file(f'{destination}.json')
        data[str(depart_date)]['outgoing'] = min_depart
        data[str(return_date)]['return'] = min_return
        with open(f'{destination}.json', 'w') as f:
            json.dump(data,
                      f,
                      sort_keys=True,
                      indent=8,
                      separators=(',', ': '))
        log.info(f"Search {index+1}/{items} complete: {depart_date} to "
                 f"{return_date}. ({min_depart}MYR, {min_return}MYR)")
Exemple #4
0
def test_daterange():
    start_date = date(2018, 5, 30)
    end_date = date(2018, 6, 1)
    interval = daterange(start_date, end_date)
    assert date(2018, 5, 30) == interval.__next__()
    assert date(2018, 5, 31) == interval.__next__()
    assert date(2018, 6, 1) == interval.__next__()
Exemple #5
0
def scrape_games():
    parser = utils.incremental_date_range_cmd_line_parser()
    utils.ensure_exists('static/scrape_data')
    os.chdir('static/scrape_data')

    args = parser.parse_args()
    last_month = ''

    for cur_date in utils.daterange(datetime.date(2010, 10, 15), 
                                    datetime.date.today()):
        str_date = time.strftime("%Y%m%d", cur_date.timetuple())
        if not utils.includes_day(args, str_date):
            if DEBUG:
                print 'skipping', str_date, 'because not in cmd line arg daterange'
            continue
        mon = time.strftime("%b%y", cur_date.timetuple())
        if mon != last_month:
            print
            print mon, cur_date.day*"  ",
            sys.stdout.flush()
            last_month = mon
        ret = scrape_date(str_date, cur_date, passive=args.passive)
        if ret==DOWNLOADED:
            print 'o',
        elif ret==REPACKAGED:
            print 'O',
        elif ret==ERROR:
            print '!',
        elif ret==MISSING:
            print '_',
        else:
            print '.',
        sys.stdout.flush()
    print
    os.chdir('../..')
Exemple #6
0
def downloadDataFromIMERG(start_date, end_date, folder):
    config_parser = ConfigParser()
    config_parser.read('resources/passwords.cfg')
    passman = urllib.request.HTTPPasswordMgrWithDefaultRealm()
    passman.add_password(None, 'https://urs.earthdata.nasa.gov',
                         config_parser.get('IMERG', 'username'),
                         config_parser.get('IMERG', 'password'))
    opener = urllib.request.build_opener(
        urllib.request.HTTPBasicAuthHandler(passman),
        urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar()))
    urllib.request.install_opener(opener)
    for a_date in daterange(start_date, end_date):
        filename = getFilenameForIMERG(a_date)
        if (path.isfile(folder + '/' + filename)):
            continue  #TODO: Also check filesize
        url = 'https://gpm1.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGDL.{version}/{year}/{month:02}/{filename}'.format(
            year=a_date.year,
            month=a_date.month,
            filename=filename,
            version=getIMERGVersion(a_date))
        if (getIMERGVersion(a_date) == '05'):
            url = url.replace(
                'data', 'opendap'
            ) + '.nc4?precipitationCal[1040:1280][339:709],precipitationCal_cnt[1040:1280][339:709],lon[1040:1280],lat[339:709]'
        request = urllib.request.Request(url)
        response = urllib.request.urlopen(request)
        handle = open(folder + '/' + filename, 'wb').write(response.read())
        time.sleep(SLEEP)
Exemple #7
0
def dailymessages(channelid: str) -> typing.Tuple[str, dict, dict]:
    """Returns discord channel name, a list of all user IDs who have sent a message and a dictionary including the number of messages every day from the start of the log to the end"""
    data = decompressdata(channelid)

    users = []
    daily = {}

    # Init daily message count dictionary with all days between first and last message
    start_dt = datetime.date.fromisoformat(
        data["messages"][0]["timestamp"].split("T")[0])
    end_dt = datetime.date.fromisoformat(
        data["messages"][-1]["timestamp"].split("T")[0])
    for dt in daterange(start_dt, end_dt):
        daily[dt.strftime("%Y-%m-%d")] = 0

    for i in range(len(data["messages"])):
        date = data["messages"][i]["timestamp"].split("T")[0]
        user = data["messages"][i]["author"]["id"]

        if not user in users:
            users.append(user)

        daily[date] += 1

    return (data['channel']['name'], users, daily)
def download(years,datasource,dest,skip_existing,datatype,filefmt):
    """
    Create date range and downloads file for each date.

    Args:
        years (iterable): years for which to download data
        datasource (str): source ('merra' or 'merra2')
        dest (str): path to destination directory
        skip_existing (bool): skip download if target exists
        datatype (str): choose 'wind' or 'solar' data for presets
        filefmt (str): file format to download
    """
    try:
        options = PRESETS[datasource]
    except KeyError as e:
        raise ArgumentError("Unknown datasource '{}'".format(datasource))

    if datatype not in options['datatypes']:
        raise ArgumentError("Unknown datatype '{}' for source '{}'".format(datatype,datasource))
    if filefmt not in options['fileformats']:
        raise ArgumentError("Unknown file format '{}' for source '{}'".format(filefmt,datasource))
    

    dates = chain(*[utils.daterange(start_date=datetime.date(year,1,1),
                                    end_date=datetime.date(year+1,1,1)) for year in years])
    pool = utils.ThreadPool(4)

    def dl_task(date):
        download_date(date,dest,skip_existing,settings=options,
            datatype=datatype,filefmt=filefmt)

    for date in dates:
        pool.add_task(dl_task,date)

    pool.wait_completion()
Exemple #9
0
 def history(self, start, end):
     history = []
     for date in daterange(start, end):
         day_balance = {}
         day_balance['day'] = date.strftime('%Y-%m-%d')
         day_balance['balance'] = self.balance_in_minutes(
             date.strftime(date.strftime('%Y-%m-%d')))
         history.append(day_balance)
     return history
    def get_blank_calendar_object(self, start_and_end):
        start = utils.get_date_from_gtfs_date(start_and_end['start_date'])
        end = utils.get_date_from_gtfs_date(start_and_end['end_date'])

        calendarObject = {}
        for d in utils.daterange(start, end):
            d = utils.get_gtfs_date_from_date(d)
            calendarObject[d] = None

        return calendarObject
Exemple #11
0
def get_chart(data):
    if (data["type"] == "subjectinplaces"):

        if "all_locations" in data:
            all_locations = True

        subject = data["subject"].strip()
        if (subject == ""):
            return "{'error':'Empty subject'}"

        subject_regex = simpleregex.create(subject)
        now = datetime.now().date()
        locations = load_locations()

        return json.dumps(
            get_subject_relevance_in_places(now, subject_regex, locations,
                                            data["ctype"] == "r"))
    # End subjectinplaces

    elif (data["type"] == "subjectinplaceshistory"):
        subject = data["subject"].strip()
        if (subject == ""):
            return "{'error':'Empty subject'}"

        start_date = datetime.strptime(data["starts"], "%Y-%m-%d").date()
        end_date = datetime.strptime(data["ends"], "%Y-%m-%d").date()

        sr = simpleregex.create(subject)
        locations = load_locations()

        all_data = {}

        for current in utils.daterange(start_date,
                                       end_date + timedelta(days=1)):
            current_str = str(current.strftime("%d-%m-%Y"))
            all_data[current_str] = get_subject_relevance_in_places(
                current, sr, locations, data["ctype"] == "r")

        return json.dumps(all_data)

    # End subjectinplaceshistory

    elif (data["type"] == "subjectsinplace"):
        date = datetime.strptime(data["date"], "%Y-%m-%d").date()
        history = date != datetime.now().date()

        data = get_trends_in_place(date, data["location"], history=history)

        return json.dumps(
            data[0]["trends"]) if data != "" else '{"error": "Data not found"}'

    # End subjectsinplace

    return '[]'
Exemple #12
0
def downloadDataFromGDAS(start_date, end_date, folder):
    for a_date in daterange(start_date, end_date):
        for a_time in ['00', '06', '12', '18']:
            for a_forecast in ['00', '03', '06', '09']:  #a forcast time
                url = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_gdas_0p25.pl?file=gdas.t{time}z.pgrb2.0p25.f0{forecast}&lev_2_m_above_ground=on&var_GUST=on&var_RH=on&var_TCDC=on&var_TMAX=on&var_TMIN=on&var_TMP=on&subregion=&leftlon=-76&rightlon=-52&toplat=-19&bottomlat=-56&dir=%%2Fgdas.{year}{month:02}{day:02}%%2F{time}'.format(
                    time=a_time,
                    forecast=a_forecast,
                    year=a_date.year,
                    month=a_date.month,
                    day=a_date.day)
                filename = getFilenameForGDAS(a_date, a_time, f=a_forecast)
                open(folder + '/' + filename,
                     'wb').write(urllib.request.urlopen(url).read())
                time.sleep(SLEEP)
Exemple #13
0
def graph(request):
    start = datetime.date(2013, 1, 27)
    to = datetime.timedelta(days=100)
    end = start + to

    if not cache.get('ar'):

        ar = {}

        feeds = Feed.objects.all()

        for feed in feeds:
            user_ar = {}

            r = daterange(start, to=end)
            for a in r:
                count = Post.objects.filter(dt_published__year=a.year, dt_published__month=a.month, dt_published__day=a.day, feed=feed).count()

                if count > 20:
                    class_ = '_4'
                elif count > 15 and count <=20:
                    class_ = '_3'
                elif count <=15 and count > 10:
                    class_ = '_2'
                elif count <= 10 and count > 0:
                    class_ = '_1'
                else:
                    class_ = '_0'

                user_ar[a] = class_

            od = collections.OrderedDict(sorted(user_ar.items()))
            ar[feed] = od

        ar = collections.OrderedDict(sorted(ar.items()))

        cache.set('ar', ar, 60*60)

    else:
        ar = cache.get('ar')

    ctx = {
#        'post_list': Post.objects.all(),
        'feed_count': [x.id for x in Feed.objects.all()],
        'table': ar,
        'today': datetime.datetime.today(),
        'show_all': True
    }

    return ctx
Exemple #14
0
def combine_price_and_sentiment(price_data, sentiment_data, date_start,
                                date_end):
    p_and_s = {}
    for company in sentiment_data.keys():
        company_ps = {}
        for date in daterange(date_start, date_end):
            date_ps = np.zeros(7)
            # Currently inefficient, if this becomes bottleneck then fix
            for date_list in price_data[company]:
                if date_list[0] == date:
                    date_ps[:6] = date_list[1:]
            if date in sentiment_data[company]:
                date_ps[6] = sentiment_data[company][date]
            if not (date_ps == np.zeros(7)).all():
                company_ps[date] = date_ps
        p_and_s[company] = company_ps
    return p_and_s
Exemple #15
0
def _get_event_table(dimension, year, quarter=None):
    quarter_dates = ['12-31', '03-31', '06-30', '09-30']
    start_year = year
    if quarter == 0:
        start_year -= 1
    start_dt = datetime.datetime.strptime(
        str(start_year) + '-' + quarter_dates[quarter], "%Y-%m-%d")
    end_dt = datetime.datetime.strptime(
        str(year) + '-' + quarter_dates[(quarter + 1) % 4], "%Y-%m-%d")
    event_table = quandl.get_table('SHARADAR/EVENTS',
                                   date=list(daterange(start_dt, end_dt)),
                                   paginate=True)
    price_table = quandl.get_table('SHARADAR/SF1',
                                   dimension=dimension,
                                   calendardate=[start_dt, end_dt],
                                   paginate=True)
    return event_table, price_table
def download(years, datasource, dest, skip_existing, datatype, filefmt):
    """
    Create date range and downloads file for each date.

    Args:
        years (iterable): years for which to download data
        datasource (str): source ('merra' or 'merra2')
        dest (str): path to destination directory
        skip_existing (bool): skip download if target exists
        datatype (str): choose 'wind' or 'solar' data for presets
        filefmt (str): file format to download
    """
    try:
        options = PRESETS[datasource]
    except KeyError as e:
        raise ArgumentError("Unknown datasource '{}'".format(datasource))

    if datatype not in options['datatypes']:
        raise ArgumentError("Unknown datatype '{}' for source '{}'".format(
            datatype, datasource))
    if filefmt not in options['fileformats']:
        raise ArgumentError("Unknown file format '{}' for source '{}'".format(
            filefmt, datasource))

    dates = chain(*[
        utils.daterange(start_date=datetime.date(year, 1, 1),
                        end_date=datetime.date(year + 1, 1, 1))
        for year in years
    ])
    pool = utils.ThreadPool(4)

    def dl_task(date):
        download_date(date,
                      dest,
                      skip_existing,
                      settings=options,
                      datatype=datatype,
                      filefmt=filefmt)

    for date in dates:
        pool.add_task(dl_task, date)

    pool.wait_completion()
Exemple #17
0
 def load_stock(self, start_date, end_date):
     combination_setting = strategy.CombinationSetting()
     strategy_creator = CombinationStrategy(combination_setting)
     codes = []
     for date in utils.daterange(utils.to_datetime(start_date),
                                 utils.to_datetime(end_date)):
         codes = list(
             set(codes + strategy_creator.subject(utils.to_format(date))))
     data = None
     while data is None or len(data) <= self.state_size:
         self.code = numpy.random.choice(codes)
         data = Loader.load(
             self.code,
             utils.to_format(
                 utils.to_datetime(start_date) - relativedelta(months=12)),
             end_date)
         data = utils.add_stats(data)
         data = Loader.filter(data, start_date, end_date)
     print("code: [%s]" % self.code)
     return data
Exemple #18
0
    def get(self):
        data = Analytics.parser.parse_args()
        likes = db.session.query(
            LikeModel.date_created,
            db.func.count(LikeModel.date_created)).filter(
                LikeModel.date_created > data['date_from'],
                LikeModel.date_created < data['date_to']).group_by(
                    LikeModel.date_created).all()
        likes_dict = {date.date(): likes_count for date, likes_count in likes}

        result_stat = [{
            "date":
            date.strftime('%Y-%m-%d'),
            "likes":
            likes_dict.get(date) if likes_dict.get(date) else 0
        } for date in daterange(
            datetime.datetime.strptime(data['date_from'], "%Y-%m-%d").date(),
            datetime.datetime.strptime(data['date_to'], "%Y-%m-%d").date())]

        return result_stat, 200
Exemple #19
0
def extractForecastData(lat, lon, out_filename):
    output = 'Date,Minimum Temp (C),Mean Temperature (C),Maximum Temp (C),Rain (mm),Relative Humidity %,CloudCover,Mean Wind SpeedKm/h' + '\n'
    today = datetime.date.today()
    for a_date in daterange(
            today, today + datetime.timedelta(hours=FORECAST_RANGE * 24)):
        FIELDS = ['Relative humidity']
        fields_values = extractDailyDataFromGDAS(
            lat,
            lon + 360.,
            a_date,
            FORECAST_PGB_FOLDER,
            FIELDS,
            typeOfLevel='heightAboveGround',
            f='00')
        mean_rh = (np.min(fields_values[FIELDS[0]]) +
                   np.max(fields_values[FIELDS[0]])) / 2.

        FIELDS = [
            'Minimum temperature', 'Maximum temperature', 'Precipitation rate'
        ]
        fields_values = extractDailyDataFromGDAS(
            lat,
            lon + 360.,
            a_date,
            FORECAST_FLX_FOLDER,
            FIELDS,
            typeOfLevel=['heightAboveGround', 'surface'],
            f='00')
        min_T, max_T = np.min(fields_values[FIELDS[0]]), np.max(
            fields_values[FIELDS[1]])
        mean_T = (min_T + max_T) / 2.
        precipitation = np.sum(
            np.array(fields_values[FIELDS[2]]) * 60 * 60 * 6)
        output += a_date.strftime('%Y-%m-%d') + ', ' + ', '.join([
            str(min_T),
            str(mean_T),
            str(max_T),
            str(precipitation),
            str(mean_rh)
        ]) + ',,' + '\n'
    open(out_filename.replace('.csv', '.forecast.csv'), 'w').write(output)
Exemple #20
0
def scrape_games():
    parser = utils.incremental_date_range_cmd_line_parser()
    utils.ensure_exists('static/scrape_data')
    os.chdir('static/scrape_data')

    args = parser.parse_args()
    last_month = ''
    
    yesterday = datetime.date.today() - datetime.timedelta(days=1)
    #Goko updates logs in real time; wait a day so the list is finalized.

    for cur_date in utils.daterange(default_startdate, yesterday, reverse=True):
        str_date = time.strftime("%Y%m%d", cur_date.timetuple())
        if not utils.includes_day(args, str_date):
            if DEBUG:
                print 'skipping', str_date, 'because not in cmd line arg daterange'
            continue
        mon = time.strftime("%b%y", cur_date.timetuple())
        if mon != last_month:
            print
            print mon, cur_date.day*"  ",
            sys.stdout.flush()
            last_month = mon
        ret = scrape_date(str_date, cur_date, passive=args.passive)
        if ret==DOWNLOADED:
            print 'o',
        elif ret==REPACKAGED:
            print 'O',
        elif ret==ERROR:
            print '!',
        elif ret==MISSING:
            print '_',
        else:
            print '.',
        sys.stdout.flush()
    print
    os.chdir('../..')
Exemple #21
0
def extractHistoricData(lat, lon, start_date, end_date, out_filename):
    output = ''
    if (not os.path.isfile(out_filename)):
        output = 'Date,Minimum Temp (C),Mean Temperature (C),Maximum Temp (C),Rain (mm),Relative Humidity %,CloudCover,Mean Wind SpeedKm/h' + '\n'
        first_date, last_date = getStartEndDates(DATA_FOLDER + 'cordoba.csv')
        start_date = min(
            start_date, first_date
        )  #in case this is a new city, we start from the very beginning
    for a_date in daterange(start_date, end_date):
        FIELDS = [
            'Minimum temperature', 'Maximum temperature', 'Relative humidity'
        ]
        #to validate that the +360 was ok: 1) gdal_translate a grib to a tif and open qgis with google map as background. 2) use https://www.latlong.net/Show-Latitude-Longitude.html 3)explore.py
        fields_values = extractDailyDataFromGDAS(
            lat,
            lon + 360.,
            a_date,
            GDAS_FOLDER,
            FIELDS,
            typeOfLevel='heightAboveGround',
            f='03')
        min_T, max_T = np.min(fields_values[FIELDS[0]]), np.max(
            fields_values[FIELDS[1]])
        mean_T = (min_T + max_T) / 2.
        mean_rh = (np.min(fields_values[FIELDS[2]]) +
                   np.max(fields_values[FIELDS[2]])) / 2.

        precipitation = extractDailyDataFromIMERG(lat, lon, a_date)
        output += a_date.strftime('%Y-%m-%d') + ', ' + ', '.join([
            str(min_T),
            str(mean_T),
            str(max_T),
            str(precipitation),
            str(mean_rh)
        ]) + ',,' + '\n'
    open(out_filename, 'a').write(output)
Exemple #22
0
	def chart_tweetsperday(self):
		self.c.execute('SELECT MIN(timestamp), MAX(timestamp) FROM tweets')
		firstt, lastt = self.c.fetchone()
		x = []
		days = []
		day = {}
		month = 0
		months = []
		x_months = []
		
		self.c.execute('SELECT timestamp FROM tweets WHERE timestamp > 0')
		for row in self.c:
			t = time.localtime(float(row[0]))
			d = date.fromtimestamp(time.mktime(t)).isoformat()
			if d in day:
				day[d] += 1
			else:
				day[d] = 1

		for single_date in daterange(date.fromtimestamp(firstt), date.fromtimestamp(lastt)):
			days.append(single_date)
			d = single_date.isoformat()
			if d[0:7] != month:
				month = d[0:7]
				months.append(single_date)
				x_months.append([0,0])
				
			if d in day:
				x.append(day[d])
				x_months[len(x_months)-1][0] += day[d]
			else:
				x.append(0)
			x_months[len(x_months)-1][1] += 1
		
		days = md.date2num(days)
		months = md.date2num(months)
		x_months = [float(_x[0])/float(_x[1]) for _x in x_months]
		
		plt.clf()
		ax = plt.subplot(111)
		plt.plot(days, x)
		plt.title(_("Tweets per day"))
		ax.set_ylabel(_("count"))
		ax.xaxis_date()
		plt.axis([min(days), max(days), 0, max(x)+5])
		ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') )
		labels = ax.get_xticklabels()
		plt.setp(labels, rotation=30, fontsize=10)
		plt.savefig(self.FILEPREFIX+"perday.png")
		self.charts.append(self.FILEPREFIX+"perday.png")
		
		plt.clf()
		ax = plt.subplot(111)
		plt.plot(months, x_months)
		plt.title(_("Tweets per day (averaged by month)"))
		ax.set_ylabel(_("count"))
		ax.xaxis_date()
		plt.axis([min(months), max(months), 0, max(x_months)+5])
		ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') )
		labels = ax.get_xticklabels()
		plt.setp(labels, rotation=30, fontsize=10)
		plt.savefig(self.FILEPREFIX+"perday.avg.png")
		self.charts.append(self.FILEPREFIX+"perday.avg.png")
		
		plt.clf()
		ax = plt.subplot(111)
		plt.hist(x, bins=30, color='b')
		plt.title(_("Tweets per day (distribution)"))
		ax.set_xlabel(_("count"))
		ax.set_ylabel(_("days"))
		plt.savefig(self.FILEPREFIX+"perday.hist.png")
		self.charts.append(self.FILEPREFIX+"perday.hist.png")
def runSimulation(account, dateRange, startingDeposit, selector, sampleSize=False, customTickerList=False, preloadToMemory=False, depositAmount=False, depositFrequency=False, comission=10, PrintToTerminal=True):
    '''
    Runs a single simulation. Saves results to a csv file.

    -Daterange must be a 2 element list, in the following format: [[<start date>], [<end date>]], date format = string "YYYY-MM-DD".
    -depositFrequency is how often (in days) to deposit funds into your trading account.
    -selector is a StockSelectionInterface object.
    -Passing a customTickerList will run the simulation using only the tickers included in the list.
    '''
    #Check for valid parameters
    if ((depositFrequency) and (not depositAmount)):
        raise ValueError("Deposit frequency set without deposit amount.")
    if ((depositAmount) and (not depositFrequency)):
        raise ValueError("Deposit amount set without deposit frequency.")

    #Instaniate objects
    if (PrintToTerminal):
        print("\nGetting tickers...")

    if (customTickerList):
        tickerList = customTickerList
    elif (sampleSize):
        tickerList = database.getTickerList(randomize=True, numberOfShuffles=2)[:sampleSize]
    else:
        tickerList = database.getTickerList()

    if (preloadToMemory):
        print("Preloading stock data to memory...")
        database.loadDatabaseToMemory(tickerList)

    #Set starting balance and comission
    account.depositFunds(startingDeposit)
    account.setCommision(comission)

    #Extract daterange
    startDate = dateRange[0]
    endDate = dateRange[1]

    #Progress bar header
    if (PrintToTerminal):
        print ("\nRuning Simulation...\n")
        print ("Selector: " + selector.getName())  #NOTE Don't forget to set your self.name property in you selector constructor
        print ("Daterange: "+startDate+" to "+endDate)
        print ("-------------------------------------------\n")
        sys.stdout.write("\r")
        sys.stdout.write("0.0%")
        sys.stdout.flush()

    daysSinceLastDeposit = 0

    #Begin simulation
    for date in utils.daterange(startDate, endDate):
        #Check if market is open
        if (utils.isWeekday(date)):
            #Selects which stocks to sell
            ownedStocks = account.getOwnedStocks()
            ownedTickers = []
            for ticker in ownedStocks:
                ownedTickers.append(ticker)

            stocksToSell = selector.selectStocksToSell(ownedTickers, date=date)
            #Sells stocks
            account.placeSellOrders(stocksToSell, date)

            #Selects which stocks to buy
            availibleFunds = account.getBalance()
            numberOfStocksToBuy = selector.numberOfStocksToBuy(availibleFunds)

            stocksToBuy = selector.selectStocksToBuy(numberOfStocksToBuy, date=date, customTickerList=tickerList)
            
            buyOrders = []

            for stock in stocksToBuy:
                ticker = stock[0]
                price = database.getDataframe(ticker, [date,date], ["Open"]).loc[date, "Open"]
                quantity = int((stock[1]*(availibleFunds-(len(stocksToBuy)*comission))) / price)
                if quantity>0:
                    buyOrders.append([ticker, quantity])

            #Buys stocks
            account.placeBuyOrders(buyOrders, date)

        if (depositFrequency):
            daysSinceLastDeposit += 1
            if (daysSinceLastDeposit == depositFrequency):
                account.depositFunds(depositAmount)
                daysSinceLastDeposit = 0

        #Progress bar
        if (PrintToTerminal):
            completed = utils.getDayDifference(startDate, date)
            totalToDo = utils.getDayDifference(startDate, endDate)
            percentage = int(float(completed*1000)/(totalToDo-1))/10.0
            sys.stdout.write("\r")
            sys.stdout.write(str(percentage)+"%")
            sys.stdout.flush()

    if (PrintToTerminal):
        print("\n")    
    #Save logs        
    account.saveHistory(selector.getName())
Exemple #24
0
import datetime as dt
from utils import daterange, fetch_game_listings


def valid_date(x):
    try:
        return dt.datetime.strptime(x, '%Y-%m-%d')
    except ValueError:
        msg = 'Not a valid date: "{0}"'.format(x)
        raise argparse.ArgumentTypeError(msg)

parser = argparse.ArgumentParser()
parser.add_argument('--start-date', help='First game date to extract',
                    type=valid_date, required=True)
parser.add_argument('--end-date', help='Last game date to extract',
                    type=valid_date, required=False)
parser.add_argument('--refresh',
                    help='Reload the game data, even if the score is final',
                    required=False, action='store_true')
args = parser.parse_args()
print(args)

if args.end_date is None:
    args.end_date = args.start_date

for d in daterange(args.start_date, args.end_date):
    print('Getting listings for {}'.format(d.strftime('%Y-%m-%d')))
    game_ids = fetch_game_listings(d)
    for gid in game_ids:
        load_game.delay(gid, skip_if_final=not args.refresh)
Exemple #25
0

class MyURLOpener(urllib.FancyURLopener):

    def http_error_default(self, *args, **kwargs):
        urllib.URLopener.http_error_default(self, *args, **kwargs)


if __name__ == '__main__':
    parser = utils.incremental_date_range_cmd_line_parser()
    args = parser.parse_args()

    utils.ensure_exists('static/scrape_data')
    os.chdir('static/scrape_data')

    for cur_date in utils.daterange(datetime.date(2010, 10, 15),
                                    datetime.date.today()):
        str_date = time.strftime("%Y%m%d", cur_date.timetuple())
        if not utils.includes_day(args, str_date):
            print 'skipping', str_date, 'because not in cmd line arg daterange'
            continue
        directory = str_date
        print str_date
        games_short_name = str_date + '.all.tar.bz2'
        saved_games_bundle = directory + '/' + games_short_name
        if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE):
            print 'skipping because exists', str_date, saved_games_bundle, \
                'and not small (size=', os.stat(saved_games_bundle).st_size, ')'
            continue
        if not os.path.exists(directory):
            os.mkdir(directory)
        RemoveSmallFileIfExists(saved_games_bundle)
Exemple #26
0
def main(parsed_args):
    """Primary update cycle"""

    # Scrape and load the data from goko, proceeding from the
    # previous day backwards, until no games are inserted
    today = datetime.date.today()
    log.info("Starting scrape for raw games")
    dates = utils.daterange(datetime.date(2010,10,14), today, reverse=True)
    for date in dates:
        log.info("Invoking scrape_raw_games async task for %s", date)
        async_result = watch_and_log(background.tasks.scrape_raw_games.s(date))
        inserted = async_result.get()

        if inserted is None:
            log.info("Nothing processed for %s", date)
        elif inserted == 0:
            log.info("No games inserted for %s", date)
            break

    # Invoke the analyze script
    log.info("Starting analyze")
    analyze.main(parsed_args)

    # Check for goals
    log.info("Starting search for goals acheived")
    # Check for game_stats
    log.info("Starting game_stats summarization")
    dates = utils.daterange(datetime.date(2010,10,14), today, reverse=True)
    for date in dates:
        log.info("Invoking calc_goals_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.calc_goals_for_days.s([date]))
        inserted = async_result.get()

        log.info("Invoking summarize_game_stats_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.summarize_game_stats_for_days.s([date]))
        if inserted == 0:
            log.info("No games parsed for goals on %s", date)
            break


    # Invoke the count_buys script
    log.info("Counting buys")
    count_buys.main(parsed_args)

    # Invoke the run_trueskill script
    log.info("Calculating trueskill")
    run_trueskill.main(parsed_args)

    # Invoke the optimal_card_ratios script
    log.info("Calculating optimal card ratios")
    optimal_card_ratios.main(parsed_args)

    # Invoke the goal_stats script
    log.info("Calculating goal stats")
    goal_stats.main(parsed_args)

    # Invoke the scrape_leaderboard script
    log.info("Scraping the leaderboard")
    scrape_leaderboard.main()

    # Invoke the load_leaderboard script
    log.info("Loading the leaderboard")
    load_leaderboard.main()

    log.info("Starting analyze2") # This is slow. Is it fast enough on cr?
    analyze2.main(parsed_args)
    log.info("Done with the update.py process")
Exemple #27
0
        return dt.datetime.strptime(x, '%Y-%m-%d')
    except ValueError:
        msg = 'Not a valid date: "{0}"'.format(x)
        raise argparse.ArgumentTypeError(msg)


parser = argparse.ArgumentParser()
parser.add_argument('--start-date',
                    help='First game date to extract',
                    type=valid_date,
                    required=True)
parser.add_argument('--end-date',
                    help='Last game date to extract',
                    type=valid_date,
                    required=False)
parser.add_argument('--refresh',
                    help='Reload the game data, even if the score is final',
                    required=False,
                    action='store_true')
args = parser.parse_args()
print(args)

if args.end_date is None:
    args.end_date = args.start_date

for d in daterange(args.start_date, args.end_date):
    print('Getting listings for {}'.format(d.strftime('%Y-%m-%d')))
    game_ids = fetch_game_listings(d)
    for gid in game_ids:
        load_game.delay(gid, skip_if_final=not args.refresh)
Exemple #28
0

def CouncilroomGamesCollectionUrl(cur_date):
    host = 'http://councilroom.com/static/scrape_data/'
    return host + FormatDate(COUNCILROOM_FORMAT, cur_date)


def RemoveSmallFileIfExists(fn):
    if (os.path.exists(fn) and os.stat(fn).st_size <= SMALL_FILE_SIZE):
        print 'removing small existing file', fn
        os.unlink(fn)


args = parser.parse_args()

for cur_date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today()):
    str_date = time.strftime("%Y%m%d", cur_date.timetuple())
    if not utils.IncludesDay(args, str_date):
        print 'skipping', str_date, 'because not in cmd line arg daterange'
        continue
    directory = str_date
    print str_date
    games_short_name = str_date + '.all.tar.bz2'
    saved_games_bundle = directory + '/' + games_short_name
    if utils.at_least_as_big_as(saved_games_bundle, SMALL_FILE_SIZE):
        print 'skipping because exists', str_date, saved_games_bundle, \
            'and not small (size=', os.stat(saved_games_bundle).st_size, ')'
    else:
        if not os.path.exists(directory):
            os.mkdir(directory)
        RemoveSmallFileIfExists(saved_games_bundle)
Exemple #29
0
        print("No layer data from {}. Will begin from {} instead".format(datetime.strptime(start_date,"%Y-%m-%d"),  datetime.strptime(layer.date_min,"%Y-%m-%d")))
        start_date = datetime.strptime(layer.date_min,"%Y-%m-%d")

print("From {} to {}".format(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")))

###############################################################################
# File checking
###############################################################################
assert os.path.isdir(data_dir), "Couldn't find the dataset at {}".format(data_dir)

# Generate labels file if non-existent
if not os.path.exists(labels_file):
    print("Making labels file: {}".format(labels_file))
    with open(labels_file, "w") as f:
        # Loop through dates
        for single_date in daterange(start_date, end_date):
            datestring = single_date.strftime("%Y-%m-%d")
            f.write(datestring + " \n")

###############################################################################
# Add split to the labels file
###############################################################################

# Count the number of examples in the dataset
N = sum(1 for line in open(labels_file))
print("There are {} total examples in the dataset".format(N))

# Split the dataset into train-val-test
first_split = int(TRAIN_CUTOFF * N)
second_split = int(VAL_CUTOFF * N)
Exemple #30
0
def main(parsed_args):
    """Primary update cycle"""

    # Scrape and load the data from isotropic, proceeding from the
    # current day backwards, until no games are inserted
    log.info("Starting scrape for raw games")
    for date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today(), reverse=True):
        log.info("Invoking scrape_raw_games async task for %s", date)
        async_result = watch_and_log(background.tasks.scrape_raw_games.s(date))
        inserted = async_result.get()

        if inserted is None:
            log.info("Nothing processed for %s", date)
        elif inserted == 0:
            log.info("No games inserted for %s", date)
            break

    # Invoke the analyze script
    log.info("Starting analyze")
    analyze.main(parsed_args)

    # Check for goals
    log.info("Starting search for goals acheived")
    for date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today(), reverse=True):
        log.info("Invoking calc_goals_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.calc_goals_for_days.s([date]))
        inserted = async_result.get()

        if inserted == 0:
            log.info("No games parsed for goals on %s", date)
            break

    # Check for game_stats
    log.info("Starting game_stats summarization")
    for date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today(), reverse=True):
        log.info("Invoking summarize_game_stats_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.summarize_game_stats_for_days.s([date]))
        inserted = async_result.get()

        if inserted == 0:
            log.info("No new games summarized on %s", date)
            break

    # Invoke the count_buys script
    log.info("Counting buys")
    count_buys.main(parsed_args)

    # Invoke the run_trueskill script
    log.info("Calculating trueskill")
    run_trueskill.main(parsed_args)

    # Invoke the optimal_card_ratios script
    log.info("Calculating optimal card ratios")
    optimal_card_ratios.main(parsed_args)

    # Invoke the goal_stats script
    log.info("Calculating goal stats")
    goal_stats.main(parsed_args)

    # Invoke the scrape_leaderboard script
    log.info("Scraping the leaderboard")
    scrape_leaderboard.main()

    # Invoke the load_leaderboard script
    log.info("Loading the leaderboard")
    load_leaderboard.main()

    log.info("Done with the update.py process")
Exemple #31
0
	def chart_mailsperday(self):
		self.c.execute('SELECT MIN(msg_time), MAX(msg_time) FROM mails')
		firstmail, lastmail = self.c.fetchone()
		x_out = []
		x_in = []
		day_out = {}
		day_in = {}
		days = []
		month = 0
		months = []
		x_months_in = []
		x_months_out = []
		
		mere = re.compile(self.ME)
		
		self.c.execute('SELECT msg_time, msg_from, msg_to, msgid FROM mails WHERE msg_time > 0')
		for row in self.c:
			t = time.localtime(float(row[0]))
			x = date.fromtimestamp(time.mktime(t))
			d = x.isoformat()
			if mere.match(row[1]) is None or mere.match(row[2]) is not None:
				# Mails to myself shouldn't be counted, they are either spam
				# or "irrelevant" for my communication profile
				if d in day_in:
					day_in[d] += 1
				else:
					day_in[d] = 1
			else:
				if d in day_out:
					day_out[d] += 1
				else:
					day_out[d] = 1

		for single_date in daterange(date.fromtimestamp(firstmail), date.fromtimestamp(lastmail)):
			days.append(single_date)
			d = single_date.isoformat()
				
			if d[0:7] != month:
				month = d[0:7]
				months.append(single_date)
				x_months_in.append([0,0])
				x_months_out.append([0,0])
				
			if d in day_in:
				x_in.append(day_in[d])
				x_months_in[len(x_months_in)-1][0] += day_in[d]
			else:
				x_in.append(0)
			if d in day_out:
				x_out.append(day_out[d])
				x_months_out[len(x_months_out)-1][0] += day_out[d]
			else:
				x_out.append(0)
			x_months_in[len(x_months_in)-1][1] += 1
			x_months_out[len(x_months_out)-1][1] += 1
			
		days = md.date2num(days)
		months = md.date2num(months)
		x_months_out = [float(_x[0])/float(_x[1]) for _x in x_months_out]
		x_months_in = [float(_x[0])/float(_x[1]) for _x in x_months_in]
		
		plt.clf()
		ax = plt.subplot(111)
		plt.plot(days, x_out, 'b', label=_('outgoing'))
		plt.plot(days, x_in, 'g', label=_('incoming'))
		plt.title(_("Mails per day"))
		ax.set_ylabel(_("count"))
		ax.xaxis_date()
		plt.axis([min(days), max(days), 0, max((max(x_in), max(x_out)))+5])
		ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') )
		labels = ax.get_xticklabels()
		plt.legend(loc=0)
		plt.setp(labels, rotation=30, fontsize=10)
		plt.savefig(self.FILEPREFIX+"perday.png")
		self.charts.append(self.FILEPREFIX+"perday.png")
		
		plt.clf()
		ax = plt.subplot(111)
		plt.plot(months, x_months_out, 'b', label=_('outgoing'))
		plt.plot(months, x_months_in, 'g', label=_('incoming'))
		plt.title(_("Mails per day (averaged by month)"))
		ax.set_ylabel(_("count"))
		ax.xaxis_date()
		plt.axis([min(months), max(months), 0, max((max(x_months_in), max(x_months_out)))+5])
		ax.xaxis.set_major_formatter( md.DateFormatter('%m/%Y') )
		labels = ax.get_xticklabels()
		plt.legend(loc=0)
		plt.setp(labels, rotation=30, fontsize=10)
		plt.savefig(self.FILEPREFIX+"perday.avg.png")
		self.charts.append(self.FILEPREFIX+"perday.avg.png")
		
		plt.clf()
		ax = plt.subplot(111)
		plt.hist([x_out, x_in], bins=100, color=['b', 'g'], label=[_('outgoing'), _('incoming')], histtype='barstacked', log=False)
		plt.title(_("Mails per day (distribution)"))
		ax.set_xlabel(_("count"))
		ax.set_ylabel(_("days"))
		plt.xlim(0, 50)
		plt.legend(loc=0)
		plt.savefig(self.FILEPREFIX+"perday.hist.png")
		self.charts.append(self.FILEPREFIX+"perday.hist.png")
Exemple #32
0
def apply_trace(figure, before, date, code, row, col, rows, cols, layout,
                options):
    args = create_args(code)
    end = date
    start = utils.to_format(
        utils.to_datetime(end) - utils.relativeterm(int(before)))

    simulator_data = strategy.load_simulator_data(
        args.code,
        start,
        end,
        args,
        names=["average"] if options["use_multi"] else [])

    df = simulator_data.middle
    df = df[df["date"] >= start]
    df = df.reset_index()

    # 陽線-> candle.data[1], 陰線 -> candle.data[0]
    candle = FF.create_candlestick(df["open"],
                                   df["high"],
                                   df["low"],
                                   df["close"],
                                   dates=df["date"])
    stocks = list(candle.data)
    figure = add_stats(figure, row, col, stocks, df, [])  # ローソク足をグラフに適用

    # 日付リスト
    dates = list(map(lambda x: utils.to_datetime(x), df["date"].astype(str)))
    rangebreaks = [{
        "values":
        list(
            filter(lambda x: not utils.is_weekday(x),
                   utils.daterange(dates[0], dates[-1])))
    }]  # 休日を除外
    daterange = [dates[0], dates[-1]]

    layout_settings = create_layout_settings(options)

    if options["use_multi"]:
        setting = layout_settings[0]
        setting["line"] = [setting["line"][0]]  # candle with average
        layout_settings = [setting]

    # ===================================================================
    # layout_settingをもとに各グラフにデータを入れる
    domain_from = 0
    for setting in sorted(layout_settings, key=lambda x: x["to"]):
        i = row if options["use_multi"] else setting["id"]
        label_i = (
            (col - 1) +
            (row - 1) * cols) + 1 if options["use_multi"] else setting["id"]

        # multiの場合同じlegendがいくつも並ぶので最初のだけ有効に
        showlegend = layout["showlegend"] and row == 1 and col == 1 if options[
            "use_multi"] else layout["showlegend"]

        figure = apply_fig_setting(figure,
                                   i,
                                   col,
                                   df,
                                   setting["bar"],
                                   mode="bar",
                                   showlegend=showlegend)
        figure = apply_fig_setting(figure,
                                   i,
                                   col,
                                   df,
                                   setting["line"],
                                   mode="lines",
                                   showlegend=showlegend)
        figure = apply_fig_setting(figure,
                                   i,
                                   col,
                                   df,
                                   setting["marker"],
                                   mode="markers",
                                   showlegend=showlegend)

        xlabel = "xaxis%s" % label_i
        layout[xlabel] = {
            "range": daterange,
            "rangebreaks": rangebreaks,
            "scaleanchor": "x1"
        }

        if not options["use_multi"]:
            ylabel = "yaxis%s" % label_i
            layout[ylabel] = {"domain": [domain_from, setting["to"]]}
            layout[xlabel]["domain"] = [0.0, 1.0]

        domain_from = setting["to"]

    return figure, layout