コード例 #1
0
ファイル: test_pybats.py プロジェクト: asher-m/spacepy
class TestParseFileTime(unittest.TestCase):
    '''
    Test the parse_filename_time function, which attempts to extract
    the datetime/runtime/run iteration from a standard SWMF file name.
    '''

    from datetime import datetime as dt

    files = [
        'mag_grid_e20130924-232600.out', 'y=0_mhd_1_e20130924-220500-054.out',
        'y=0_mhd_2_t00001430_n00031073.out',
        'z=0_mhd_2_t00050000_n00249620.out',
        os.path.join(spacepy_testing.datadir, 'pybats_test',
                     'mag_grid_ascii.out'),
        'y=0_mhd_1_t20140410000000_n00001500.out',
        'z=0_mhd_2_e20140410-000000-000_20140410-000300-000.outs',
        'z=0_mhd_2_n00001500_00001889.outs'
    ]
    iters = [None, None, 31073, 249620, None, 1500, None, [1500, 1889]]
    times = [None, None, 870, 18000, None, None, None, None]
    dates = [
        dt(2013, 9, 24, 23, 26, 0),
        dt(2013, 9, 24, 22, 5, 0), None, None, None,
        dt(2014, 4, 10, 0, 0),
        [dt(2014, 4, 10, 0, 0, 0),
         dt(2014, 4, 10, 0, 3, 0)], None
    ]

    def testParse(self):
        from spacepy.pybats import parse_filename_time
        for f, d, t, i in zip(self.files, self.dates, self.times, self.iters):
            self.assertEqual(parse_filename_time(f), (i, t, d))
コード例 #2
0
def toYearFraction(date):
    """
    DISCONTINUED
    use dt2year_decimal instead (which is the same)
    """
    #
    # Give the decimal year
    # source :
    # http://stackoverflow.com/questions/6451655/python-how-to-convert-datetime-dates-to-decimal-years

    from datetime import datetime as dt
    import time

    def sinceEpoch(date):  # returns seconds since epoch
        return time.mktime(date.timetuple())

    s = sinceEpoch

    year = date.year
    startOfThisYear = dt(year=year, month=1, day=1)
    startOfNextYear = dt(year=year + 1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed / yearDuration

    return date.year + fraction
コード例 #3
0
def test_pruning(self):
    raw_data = pd.read_csv("./Input/mdata_5-1.txt")
    cols = raw_data.columns
    X = raw_data[cols[1:-1]]
    y = raw_data[cols[-1]]

    # pre pruning
    clf = dt(criterion="gain_ratio", min_samples_leaf=4)
    clf.fit(X, y)
    print(clf.tree)
    print(clf.num_leaf)
    clf = dt(criterion="gain_ratio", min_samples_leaf=3)
    clf.fit(X, y)
    print(clf.tree)
    print(clf.num_leaf)
コード例 #4
0
def toYearFraction(date):
    import time
    def sinceEpoch(date): # returns seconds since epoch
        return time.mktime(date.timetuple())
    s = sinceEpoch

    year = date.year
    startOfThisYear = dt(year=year, month=1, day=1)
    startOfNextYear = dt(year=year+1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed/yearDuration

    return date.year + fraction
コード例 #5
0
ファイル: unit_test.py プロジェクト: wjq8421/MLNote
 def test_q51(self):
     raw_data = pd.read_csv("./Input/data_5-1.txt")
     cols = raw_data.columns
     X = raw_data[cols[1:-1]]
     y = raw_data[cols[-1]]
     # criterion: gain_ratio
     clf = dt(criterion="gain_ratio")
     clf.fit(X, y)
     logger.info("gain_ratio")
     rst = {
         '有自己的房子': {
             '否': {
                 '有工作': {
                     '否': {
                         '否': None
                     },
                     '是': {
                         '是': None
                     }
                 }
             },
             '是': {
                 '是': None
             }
         }
     }
     self.assertEqual(rst, clf.tree)
     logger.info(clf.tree)
コード例 #6
0
def test_q51():
    raw_data = pd.read_csv("./Input/data_5-1.txt")
    cols = raw_data.columns
    X = raw_data[cols[1:-1]]
    y = raw_data[cols[-1]]
    # criterion: gain_ratio
    clf = dt(criterion="gain_ratio")
    clf.fit(X, y)
    print("gain_ratio")
    rst = {
        '有自己的房子': {
            '否': {
                '有工作': {
                    '否': {
                        '否': None
                    },
                    '是': {
                        '是': None
                    }
                }
            },
            '是': {
                '是': None
            }
        }
    }
    print(clf.tree)
コード例 #7
0
def test_e54():
    raw_data = pd.read_csv("./Input/mdata_5-1.txt")
    cols = raw_data.columns
    X = raw_data[cols[1:-1]]
    y = raw_data[cols[-1]]
    clf = dt()
    clf.fit(X, y)
    print(clf.tree)
コード例 #8
0
ファイル: unit_test.py プロジェクト: wjq8421/MLNote
    def test_e54(self):
        raw_data = pd.read_csv("./Input/mdata_5-1.txt")
        cols = raw_data.columns
        X = raw_data[cols[1:-1]]
        y = raw_data[cols[-1]]

        clf = dt()
        clf.fit(X, y)
        logger.info(clf.tree)
コード例 #9
0
def temp_monthly():
    """Temperature observartion for Top Station for last year"""

    session = Session(engine)

    #find last date in database from Measurements
    last_date = session.query(Measurement.date).order_by(Measurement.date.desc()).first().date

    #convert last date string to date
    last_date = dt.datetime.strptime(last_date, "%Y-%m-%d")

    #calculate date one year after last date using timedelta datetime function
    first_date = last_date - dt(days=365)

    #list the stations and the counts in descending order
    station_counts = session.query(Measurement.station, func.count(Measurement.station).group_by(Measurement.station).order_by(func.count(Measurement.station).desc().all()

    #create top station variable from tuple
    top_station = (station_counts[0])
    top_station = (top_station[0])

    #calculate the lowest temperature and the highest temperature recorded, and averge temperature of the most active station.
    session.query(func.min(Measurement.tobs), func.max(Measurement.tobs), func.avg(Measurement.tobs)).\
    filter(Measurement.station == top_station).all()

    #query the last 12 months of tempearture observation data for this station and plot the results as a histogram
    filter(Measurement.station == top_station).filter(Measurement.date >= first_date).all()
    return jsonify(top_station_year_obs)

@app.route("/api/v1/0/temp/<start>")
@app.route("/api/v1.0/temp/<start>/<end>")
def stats(start=None, end=None):
    session = Session(engine)

    """Return Temperature Min, Temperature Avg, Temperature Max"""

    #select statement
    sel = [func.min(Measurement.tobs), func.avg(Measurement.tobs), func.max(Measurement.tobs)]

    if not end:
        #calculate Temperature Min, Temperature Avg, Temperature Max for dates greater than start
        results = session.query(*sel).\
            filter(Measurement.date >= start).all()
        #Ravel results into a 1D array and convert to a list
        temps = list(np.ravel(results))
        return jsonify(temps)

        #calculate Temperature Min, Temperature Avg, Temperature Max with start and stop
        results = session.query(*sel).\
            filter(Mesurement.date >= start).\
            filter(Measurement.date <= end),all()
        return jsonify(results)

    if __name__ == '__main__':
        app.run()
コード例 #10
0
def test_predict(self):
    raw_data = pd.read_csv("./Input/mdata_5-1.txt")
    cols = raw_data.columns
    X = raw_data[cols[1:-1]]
    y = raw_data[cols[-1]]

    clf = dt(criterion="gain_ratio")
    clf.fit(X, y)
    rst = clf.predict(X[:1])
    self.assertEqual(rst, y[:1].values)
    print("predict: ", rst, "label: ", y[:1])
コード例 #11
0
class TestParseFileTime(unittest.TestCase):
    '''
    Test the parse_filename_time function, which attempts to extract
    the datetime/runtime/run iteration from a standard SWMF file name.
    '''

    from datetime import datetime as dt

    files = [
        'mag_grid_e20130924-232600.out', 'y=0_mhd_1_e20130924-220500-054.out',
        'y=0_mhd_2_t00001430_n00031073.out',
        'z=0_mhd_2_t00050000_n00249620.out'
    ]
    dates = [dt(2013, 9, 24, 23, 26, 0), dt(2013, 9, 24, 22, 5, 0), None, None]
    times = [None, None, 870, 18000]
    iters = [None, None, 31073, 249620]

    def testParse(self):
        from spacepy.pybats import parse_filename_time
        for f, d, t, i in zip(self.files, self.dates, self.times, self.iters):
            self.assertEqual(parse_filename_time(f), (i, t, d))
コード例 #12
0
ファイル: tasks.py プロジェクト: fengxia41103/jk
    def parser(self, f, output=None):
        try:
            book = xlrd.open_workbook(f)
        except:
            logger.error('%s error' % f)
            return

        sh = book.sheet_by_index(0)
        if sh.name != u'账户对账单':
            logger.error(f)

        # range for 账户对账单
        first_col_vals = [sh.cell_value(rowx=i, colx=0)
                          for i in range(sh.nrows)]
        start = end = None
        for idx, val in enumerate(first_col_vals):
            if val == u'对帐单':
                start = idx
            if val == u'当日持仓清单':
                end = idx
            if start and end:
                break

        vals = []
        for row in xrange(start, end):
            vals.append([sh.cell_value(rowx=row, colx=c)
                         for c in range(sh.ncols)])

        for row in filter(lambda x: u'20' in x[0], vals):
            symbol = row[3]
            transaction_type = row[1]
            price = float(row[7])
            vol = int(row[5])
            total = float(row[8])
            name = row[4]

            # timestamp
            yr = int(row[0][:4])
            m = int(row[0][5:6])
            d = int(row[0][-2:])
            executed_on = dt(year=yr, month=m, day=d)

            c = MyChenmin(
                executed_on=executed_on,
                transaction_type=transaction_type,
                symbol=symbol,
                name=name,
                price=price,
                vol=vol,
                total=total
            )
            c.save()
        logger.debug('%s done' % f)
コード例 #13
0
ファイル: tasks.py プロジェクト: fengxia41103/jk
    def parser(self, symbol, val_list):
        logger.info('processing %s' % symbol)
        stock = MyStock.objects.get(symbol=symbol)
        records = []
        cnt = 0
        total = len(val_list)
        for vals in val_list:
            if len(vals) < 10:
                logger.error('wrong length %s' % ','.join(vals))

            exec_start = time.time()
            date_stamp = dt(year=int(vals[1][:4]), month=int(
                vals[1][4:6]), day=int(vals[1][-2:]))
            open_p = Decimal(vals[2])
            high_p = Decimal(vals[3])
            low_p = Decimal(vals[4])
            close_p = Decimal(vals[5])
            vol = Decimal(vals[6])
            amount = Decimal(vals[7]) * Decimal(10.0)
            adj = Decimal(vals[8])
            status = int(vals[9])

            h = MyStockHistorical(
                stock=stock,
                date_stamp=date_stamp,
                open_price=open_p,
                high_price=high_p,
                low_price=low_p,
                close_price=close_p,
                vol=vol,
                amount=amount,
                status=status,

                # adjusted values
                adj_open=open_p * adj,
                adj_high=high_p * adj,
                adj_low=low_p * adj,
                adj_close=close_p * adj,
            )
            records.append(h)
            if len(records) >= 1000:
                MyStockHistorical.objects.bulk_create(records)
                cnt += len(records)
                records = []
                logger.info('%s inserted %d/%d' % (symbol, cnt, total))
        if len(records):
            MyStockHistorical.objects.bulk_create(records)
        logger.info('%s elapse %f' % (symbol, time.time() - exec_start))
コード例 #14
0
def precipitation():
    """Last year of Precipitation Data"""
    session = Session(engine)

    """Return a list of the dates and precipitation from last year"""
    # Query for the dates and precipitation from last year
    last_date = session.query(Measurement.date).order_by(Measurement.date.desc()).first().date


    #convert last date string to date
    last_date = dt.datetime.strptime(last_date, "%Y-%m-%d")

    #calculate date one year after last date using timedelta datetime function
    first_date = last_date - dt(days=365)

    #perform a query to retreive the data and precipitation scores
    last_year_data = session.query(Measurement.date, Measurement.prcp).filter(Measurement.date >= first_date).all()
    return jsonify(last_year_data)
コード例 #15
0
import dash_table as dt
from datetime import datetime as dt


def resolve_entry_form(selections):
    for selection in selections:
        if selection is None:
            return
    return entry_layout


entry_layout = html.Div(children=[
    html.Div(html.Div(children=[
        html.P('Entry Date'),
        dcc.DatePickerSingle(id='entry-date-picker',
                             min_date_allowed=dt(2015, 1, 1),
                             max_date_allowed=dt.today(),
                             initial_visible_month=dt(dt.today().year,
                                                      dt.today().month, 1),
                             display_format='M/D/YYYY')
    ]),
             className='two columns offset-by-one column'),
    html.Div(html.Div(children=[
        html.P('Strike Price'),
        dcc.Input(id='entry-strike-price',
                  type='number',
                  placeholder='Enter strike price...')
    ]),
             className='two columns'),
    html.Div(html.Div(children=[
        html.P('Ticker'),
コード例 #16
0
ファイル: data_import.py プロジェクト: fengxia41103/jk
def import_chenmin_csv():
    root = '/home/fengxia/Desktop/chenmin/alpha'
    for f in os.listdir(root):
        symbol, ext = os.path.splitext(os.path.basename(f))
        stock, created = MyStock.objects.get_or_create(symbol=symbol)
        his = [x.isoformat() for x in MyStockHistorical.objects.filter(
            stock=stock).values_list('date_stamp', flat=True)]
        records = []

        with open(os.path.join(root, f), 'rb') as csvfile:
            for cnt, vals in enumerate(csv.reader(csvfile)):
                if not vals:
                    continue  # handle blank lines

                # some time stamp is in form of "x/x/x", normalized to "x-x-x"
                # format
                vals[0] = vals[0].replace('/', '-')
                if len(vals) != 6:
                    print 'error in %s' % symbol
                    print cnt, vals
                    raw_input()
                elif '-' not in vals[0]:
                    continue  # skip these title lines

                stamp = [int(v) for v in vals[0].split('-')]
                date_stamp = dt(year=stamp[0], month=stamp[1], day=stamp[2])

                if date_stamp.date().isoformat() in his:
                    continue  # we already have these
                else:
                    try:
                        open_p = Decimal(vals[1])
                    except:
                        open_p = Decimal(-1)
                    try:
                        high_p = Decimal(vals[2])
                    except:
                        high_p = Decimal(-1)
                    try:
                        low_p = Decimal(vals[3])
                    except:
                        low_p = Decimal(-1)
                    try:
                        close_p = Decimal(vals[4])
                    except:
                        close_p = Decimal(-1)
                    try:
                        vol = int(vals[5]) / 1000.0
                    except:
                        vol = -1
                    try:
                        adj_p = Decimal(vals[6])
                    except:
                        adj_p = Decimal(-1)
                    h = MyStockHistorical(
                        stock=stock,
                        date_stamp=date_stamp,
                        open_price=open_p,
                        high_price=high_p,
                        low_price=low_p,
                        close_price=close_p,
                        vol=vol,
                        adj_close=adj_p
                    )
                    records.append(h)
                    if len(records) >= 1000:
                        MyStockHistorical.objects.bulk_create(records)
                        records = []
        if len(records):
            MyStockHistorical.objects.bulk_create(records)

        # persist
        print '[%s] complete' % symbol
コード例 #17
0
    def get_business_above_series_c_or_ipo_merger_date(self,
                                                       permalink,
                                                       company_js=None):
        if not company_js:
            company_js = EntityParser.LoadJsonEntity('data/company/' +
                                                     permalink)

        if company_js:
            deadpooled_year = company_js['deadpooled_year']
            deadpooled_month = company_js['deadpooled_month']
            if deadpooled_year and deadpooled_month:
                return dt(deadpooled_year, deadpooled_month, 1), 'Dead'
            funding_rounds = company_js.get('funding_rounds')
            label_date = None
            last_funding_round = None
            if funding_rounds:
                for round in funding_rounds:
                    if round['round_code'] not in [
                            'angel', 'seed', 'a', 'b', 'unattributed'
                    ]:
                        round_year = round['funded_year']
                        round_month = round['funded_month']
                        round_day = round['funded_day']
                        try:
                            round_date = dt(round_year, round_month, round_day)
                        except:
                            continue
                        if label_date is not None:
                            if label_date > round_date:
                                label_date = round_date
                                last_funding_round = round['round_code']
                        else:
                            last_funding_round = round['round_code']
                            label_date = round_date
                if label_date is not None:
                    return label_date, last_funding_round

            acquisition = company_js.get('acquisition', [])
            ipos = company_js.get('ipo', [])
            if acquisition and len(acquisition) > 1:

                acq_year = acquisition['acquired_year']
                acq_month = acquisition['acquired_month']
                acq_day = acquisition['acquired_day']
                try:
                    acq_date = dt(acq_year, acq_month, acq_day)
                    if label_date is not None:
                        if label_date > acq_date:
                            label_date = acq_date
                    else:
                        label_date = acq_date
                    if label_date is not None:
                        return label_date, 'acquired'
                except:
                    pass

            if ipos:
                ipo_year = ipos['pub_year']
                ipo_month = ipos['pub_month']
                ipo_day = ipos['pub_day']
                try:
                    ipo_date = dt(ipo_year, ipo_month, ipo_day)
                    if label_date is not None:
                        if label_date > ipo_date:
                            label_date = ipo_date
                    else:
                        label_date = ipo_date
                    if label_date is not None:
                        return label_date, 'ipo'
                except:
                    pass

            return label_date, None
コード例 #18
0
    def extract_company_feature(self, company_file):
        company_js = EntityParser.LoadJsonEntity(company_file)
        if company_js is None:
            return None
        features = self.get_feature_template()
        founded_year = company_js['founded_year']
        founded_month = company_js['founded_month'] if company_js[
            'founded_month'] else 1
        founded_day = company_js['founded_day'] if company_js[
            'founded_day'] else 1
        try:
            founded_date = dt(founded_year, founded_month, 1).date()
        except TypeError:
            return None

        funding_rounds = company_js['funding_rounds']
        total_funding = 0
        if funding_rounds:
            max_fund_code = 0
            max_funding_round = None
            for round in funding_rounds:
                code = round['round_code']
                raised_amt = round['raised_amount']
                if raised_amt:
                    total_funding += raised_amt
                if round['round_code'] in funding_stages.keys():
                    year = round['funded_year']
                    month = round['funded_month']
                    day = round['funded_day'] if round['funded_day'] else 1
                    try:
                        fund_date = dt(year, month, day)
                    except:
                        continue

                    date_diff = relativedelta.relativedelta(
                        fund_date, founded_date)
                    months_diff = (date_diff.years * 12) + date_diff.months
                    if features[code + '_months']:
                        if features[code + '_months'] < months_diff:
                            features[code + '_months'] = months_diff
                    else:
                        features[code + '_months'] = months_diff

                    if features[code + '_raised_amount']:
                        if raised_amt:
                            features[code + '_raised_amount'] += raised_amt
                    else:
                        features[code + '_raised_amount'] = raised_amt
        try:
            features['investment_per_funding_round'] = total_funding / len(
                funding_rounds)
        except ZeroDivisionError:
            features['investment_per_funding_round'] = 0
        company_website = company_js['homepage_url']
        features['no_of_employees'] = company_js['number_of_employees']
        features['com_domain'] = True if '.com' in company_website else False
        features['number_of_milestone'] = len(company_js.get('milestones', []))
        features['number_of_offices'] = len(company_js['offices'])
        features['category'] = company_js['category_code']
        features['number_of_providers'] = len(company_js['providerships'])
        features['number_of_competitors'] = len(company_js['competitions'])
        features['number_of_products'] = len(company_js['products'])
        features['number_of_funding_rounds'] = len(funding_rounds)
        features['number_of_investments'] = len(company_js['investments'])
        features['headquarter_location'] = self.find_company_headquarter(
            company_js)
        features['number_of_co_founder'] = self.get_num_of_co_founders(
            company_js)

        company_permalink = company_js['permalink']

        for competitor in company_js['competitions']:
            comp = competitor['competitor']
            permalink = comp['permalink']
            got_series_c = self.find_business_above_series_c_or_ipo_merger(
                permalink)
            if got_series_c:
                if features['number_of_competitors_got_series_c'] is None:
                    features['number_of_competitors_got_series_c'] = 1
                else:
                    features['number_of_competitors_got_series_c'] += 1

        features['label'] = self.find_business_above_series_c_or_ipo_merger(
            '', company_js)
        features['label_date'], features[
            'label_stage'] = self.get_business_above_series_c_or_ipo_merger_date(
                '', company_js)
        company_age = relativedelta.relativedelta(features['label_date'],
                                                  founded_date)
        company_age_months = (company_age.years * 12) + company_age.months
        features['company_age_months'] = company_age_months
        features['name'] = company_js['name']
        features['permalink'] = company_js['permalink']
        features[
            'number_of_tech_crunch_article'] = self.get_tech_crunch_articles_count(
                company_permalink, features['label_date'])

        no_of_phd, no_of_financial, no_of_engineer, \
            no_of_companies_by_founder, \
            no_of_successful_company_by_founder = self.get_team_background(company_js)

        features['number_of_financial_background'] = no_of_financial
        features['number_of_engineering_background'] = no_of_engineer
        features['number_of_phd'] = no_of_engineer
        features['number_of_companies_by_founder'] = no_of_companies_by_founder
        features[
            'successful_companies_by_founder'] = no_of_successful_company_by_founder
        return features
コード例 #19
0
def main():
    """
    *********************************************
    Extract and clean data from nyc open data
    *********************************************
    """
    APP_TOKEN = app_token()
    base_url = "https://data.cityofnewyork.us/resource/h9gi-nx95.json?$$app_token={}".format(
        APP_TOKEN)
    url = base_url + "{}"
    cnt_url = base_url + "{}{}"  # select , where

    where_inj = "&$where=number_of_cyclist_injured>0.0&$limit=50000"
    where_kill = "&$where=number_of_cyclist_killed>0.0"

    inj_df = pd.read_json(url.format(where_inj))
    killed_df = pd.read_json(url.format(where_kill))

    def dt(date, time):
        date = pd.to_datetime(date).dt.date
        time = pd.to_datetime(time).dt.time
        return date, time

    # so frustrating. NYC open data changed columns from "accident" to "crash"

    killed_df.crash_date, killed_df.crash_time = dt(killed_df.crash_date,
                                                    killed_df.crash_time)
    inj_df.crash_date, inj_df.crash_time = dt(inj_df.crash_date,
                                              inj_df.crash_time)

    killed_df = killed_df.rename(columns={
        'crash_date': 'accident_date',
        'crash_time': 'accident_time'
    })
    inj_df = inj_df.rename(columns={
        'crash_date': 'accident_date',
        'crash_time': 'accident_time'
    })

    df = (pd.concat([
        inj_df, killed_df
    ]).drop(columns='location').drop_duplicates().reset_index(drop=True))
    df.vehicle_type_code1 = df.vehicle_type_code1.apply(
        lambda x: str(x).upper())
    df.vehicle_type_code2 = df.vehicle_type_code2.apply(
        lambda x: str(x).upper())

    df['Accident Year'] = df.accident_date.apply(lambda x: x.year)
    df['Accident Month'] = df.accident_date.apply(lambda x: x.month)
    df['Accident Hour'] = df.accident_time.apply(lambda x: x.hour)

    def create_df(group):
        return (df.groupby(group).collision_id.count().reset_index().rename(
            columns={'collision_id': 'Number of Accidents'}))

    """
    *********************************************
    Create figures for month and hour data
    *********************************************
    """

    crash_mo_yr = create_df(['Accident Year', 'Accident Month'])
    crash_hr = create_df('Accident Hour')
    crash_mo_hr = create_df(['Accident Month', 'Accident Hour'])

    killed_df['accident_year'] = killed_df.accident_date.apply(
        lambda x: x.year)
    killed_df['accident_month'] = killed_df.accident_date.apply(
        lambda x: x.month)
    killed_df['accident_hr'] = killed_df.accident_time.apply(lambda x: x.hour)

    mo_fig = px.area(crash_mo_yr,
                     x="Accident Month",
                     y="Number of Accidents",
                     animation_frame="Accident Year",
                     range_y=[0, 800],
                     range_x=[1, 12])
    mo_fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1000
    mo_fig.layout.title = "Bicycle Accidents by Month for Each Year"

    pio.write_html(mo_fig, file="app/static/mo_fig.html", auto_play=False)

    hr_fig = px.area(crash_mo_hr,
                     x="Accident Hour",
                     y="Number of Accidents",
                     animation_frame="Accident Month",
                     range_y=[0, 400],
                     range_x=[0, 23])
    hr_fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1000
    hr_fig.layout.title = "Bicycle Accidents by Hour For Each Month"

    pio.write_html(hr_fig, file="app/static/hr_fig.html", auto_play=False)
    """
    *********************************************
    Extract data from citibike files - all trips
    *********************************************
    """

    fdir = './agg_trip'
    agg_files = os.listdir(fdir)
    agg_df = pd.read_csv(fdir + '/' + agg_files[0]).iloc[:, [0, 1]]
    for i in range(1, len(agg_files)):
        agg_df = agg_df.append(
            pd.read_csv(fdir + '/' + agg_files[i]).iloc[:, [0, 1]])
    agg_df.Date = pd.to_datetime(agg_df.Date).dt.date
    agg_df = agg_df.rename(columns={
        'Trips over the past 24-hours (midnight to 11:59pm)':
        'Number of Trips'
    })
    agg_df = agg_df.sort_values('Date')

    fig = px.line(agg_df,
                  x='Date',
                  y='Number of Trips',
                  title="Number of CitiBike Trips by Day",
                  hover_name='Date')
    pio.write_html(fig, file="app/static/fig.html", auto_play=False)
    """
    *********************************************
    Using 9/25/2019 to map common citibike routes
    *********************************************
    """

    high_day = pd.read_csv('./app/static/high_day.csv')
    coord092519 = high_day[[
        'start station name', 'start station id', 'start station latitude',
        'start station longitude', 'end station name', 'end station id',
        'end station latitude', 'end station longitude'
    ]].copy()
    coord092519['id'] = (coord092519['start station name'] +
                         coord092519['end station name'])
    coord092519 = coord092519.groupby([
        'start station name', 'start station id', 'start station latitude',
        'start station longitude', 'end station name', 'end station id',
        'end station latitude', 'end station longitude'
    ]).id.count().reset_index()

    coord092519['filt'] = coord092519.apply(
        lambda x: 'y'
        if x['start station name'] == x['end station name'] else '',
        axis=1)
    coord092519 = coord092519[coord092519.filt != 'y'].reset_index(drop=True)

    cohort = coord092519[coord092519.id >= 4]
    cohort = cohort.rename(columns={'id': 'count'})
    cohort['id'] = cohort['start station id'].apply(
        str) + '-' + cohort['end station id'].apply(str)

    routes = pd.read_csv('./app/static/backup_route_file.csv')
    routes = routes[
        routes.geojson != '{"message":"Too Many Requests"}'].reset_index(
            drop=True)

    cohort_df = pd.merge(cohort,
                         routes[['id', 'geojson']],
                         on='id',
                         how='inner')
    cohort_df = cohort_df[['geojson']].drop_duplicates()

    geojson = list(cohort_df.geojson)
    gjson = []
    for i in range(len(geojson)):
        gjson.append(
            json.loads(geojson[i])['routes'][0]['geometry']['coordinates'])

    for i in gjson:
        for j in i:
            j.reverse()
    """
    *********************************************
    mapping the accidents
    *********************************************
    """

    loc_df = df[[
        'borough', 'latitude', 'longitude', 'on_street_name',
        'off_street_name', 'accident_date'
    ]].copy()
    loc_df = loc_df[(pd.isna(loc_df.latitude) == False)
                    & (loc_df.latitude != 0) & (loc_df.longitude != 0)]
    loc_df.on_street_name = loc_df.on_street_name.str.strip()
    loc_df.off_street_name = loc_df.off_street_name.str.strip()
    loc_df.accident_date = loc_df.accident_date.apply(str)
    loc_df['lat_lon_list'] = loc_df.apply(lambda x: [x.longitude, x.latitude],
                                          axis=1)
    loc_df = loc_df.sort_values('accident_date').reset_index(drop=True)

    intersect_df = loc_df.copy()
    intersect_df[
        'intersection'] = intersect_df.on_street_name + ';' + intersect_df.off_street_name
    intersect_df.intersection = intersect_df.intersection.apply(
        lambda x: ' & '.join(sorted(x.split(';')))
        if pd.isna(x) == False else x)

    dang_int = (intersect_df.groupby(
        ['borough',
         'intersection'])['accident_date'].count().reset_index().sort_values(
             'accident_date', ascending=False).rename(
                 columns={'accident_date': 'Number of Bike Accidents'}))

    # For the table
    dang_int_viz = (dang_int[dang_int['Number of Bike Accidents'] >= 10].copy(
    ).reset_index(drop=True).rename(columns={
        'borough': 'Borough',
        'intersection': 'Intersection'
    }))

    for i in dang_int_viz.index:
        Crash(
            dang_int_viz.iloc[i].Borough,
            dang_int_viz.iloc[i].Intersection).create_map().save(
                'app/static/crash_maps/' + dang_int_viz.iloc[i].Borough +
                dang_int_viz.iloc[i].Intersection.replace(' ', '_') + '.html')

    dang_int_viz.Intersection = dang_int_viz.apply(
        lambda x: '<a href={} target="iframe_map">{}</a>'.format(
            '../static/crash_maps/' + x.Borough + x.Intersection.replace(
                ' ', '_') + '.html', x.Intersection),
        axis=1)

    html = """<table border="1" class="dataframe">
    <thead>
    <tr style="text-align: right;">
    <th>Borough</th>
    <th>Intersection</th>
    <th>Number of Bike Accidents</th>
    </tr>
    </thead>
    <tbody>
    """
    for i in dang_int_viz.index:
        html = (html + '<tr><td>' + dang_int_viz.iloc[i].Borough +
                '</td><td>' + dang_int_viz.iloc[i].Intersection + '</td><td>' +
                str(dang_int_viz.iloc[i]['Number of Bike Accidents']) +
                '</td></tr>')
    html = html + "</tbody></table>"
    html = BeautifulSoup(html, "lxml")

    html.body.insert(
        0,
        BeautifulSoup('<link rel="stylesheet" href="/static/style.css">',
                      "lxml"))

    with open('app/static/crash_table.html', 'w') as f:
        f.write(str(html))

    lat_lon = intersect_df[['intersection', 'lat_lon_list']].copy()
    lat_lon.lat_lon_list = lat_lon.lat_lon_list.apply(
        lambda x: str(round(x[0], 5)) + ';' + str(round(x[1], 5)))
    lat_lon = lat_lon.drop_duplicates().reset_index(drop=True)
    lat_lon.lat_lon_list = lat_lon.lat_lon_list.apply(
        lambda x: [float(i) for i in x.split(';')])
    for i in lat_lon.index:
        lat_lon.lat_lon_list[i].reverse()

    dang_int = pd.merge(dang_int, lat_lon, on='intersection', how='left')

    dang_int.to_csv('app/static/dang_int.csv', index=False)

    dang_int_10 = (
        dang_int[(dang_int['Number of Bike Accidents'] >= 10)
                 & (dang_int['Number of Bike Accidents'] < 15)].reset_index(
                     drop=True))
    dang_int_15 = (
        dang_int[(dang_int['Number of Bike Accidents'] >= 15)
                 & (dang_int['Number of Bike Accidents'] < 20)].reset_index(
                     drop=True))
    dang_int_20 = (
        dang_int[dang_int['Number of Bike Accidents'] >= 20].reset_index(
            drop=True))

    features = [{
        'type': 'Feature',
        'geometry': {
            'type': 'MultiPoint',
            'coordinates': list(loc_df.lat_lon_list),
        },
        'properties': {
            'times': list(loc_df.accident_date),
            'icon': 'circle',
            'iconstyle': {
                'fillColor': 'red',
                'fillOpacity': 0.5,
                'stroke': 'false',
                'radius': 5
            },
            'style': {
                'weight': 0.5
            }
        }
    }]
    """
    *********************************************
    Getting the bike lanes and formatting the data
    *********************************************
    """

    bike_lanes = pd.read_json('./app/static/Bicycle Routes.geojson')

    bl_prot_json = []
    bl_stand_json = []
    for i in bike_lanes.index:
        if bike_lanes.iloc[i].features['properties']['facilitycl'] == 'I':
            for j in range(
                    len(bike_lanes.iloc[i].features['geometry']
                        ['coordinates'])):
                bl_prot_json.append(
                    bike_lanes.iloc[i].features['geometry']['coordinates'][j])
        else:
            for j in range(
                    len(bike_lanes.iloc[i].features['geometry']
                        ['coordinates'])):
                bl_stand_json.append(
                    bike_lanes.iloc[i].features['geometry']['coordinates'][j])

    for i in bl_prot_json:
        for j in i:
            j.reverse()
    for i in bl_stand_json:
        for j in i:
            j.reverse()
    """
    *********************************************
    Creating the map and interactive features
    *********************************************
    """

    nyc_map = folium.Map(location=[40.735, -73.95],
                         zoom_start=11.5,
                         tiles=None)
    folium.TileLayer('cartodbdark_matter', control=False).add_to(nyc_map)

    # Add bike lanes
    folium.PolyLine(bl_prot_json, weight=1, opacity=0.9, color='lime').add_to(
        folium.FeatureGroup(name='Protected Bike Lanes').add_to(nyc_map))

    folium.PolyLine(bl_stand_json, weight=1, opacity=0.9,
                    color='yellow').add_to(
                        folium.FeatureGroup(
                            name='Non-Protected Bike Lanes').add_to(nyc_map))

    # Add citibike routes
    folium.PolyLine(gjson, weight=1, opacity=0.2).add_to(
        folium.FeatureGroup(name='Commonly Used Citibike Routes',
                            overlay=False).add_to(nyc_map))

    # Add Dangerous intersections data
    over10 = folium.FeatureGroup(name='Intersections w/10-14 Accidents',
                                 overlay=False)
    for i in dang_int_10.index:
        over10.add_child(
            folium.Marker(
                dang_int_10.lat_lon_list[i],
                tooltip=(dang_int_10.intersection[i] + ':\t' +
                         str(dang_int_10['Number of Bike Accidents'][i]) +
                         ' Accidents'),
                icon=folium.Icon(color='red',
                                 prefix='fa',
                                 icon='fas fa-bicycle')))
    over15 = folium.FeatureGroup(name='Intersections w/15-19 Accidents',
                                 overlay=False)
    for i in dang_int_15.index:
        over15.add_child(
            folium.Marker(
                dang_int_15.lat_lon_list[i],
                tooltip=(dang_int_15.intersection[i] + ':\t' +
                         str(dang_int_15['Number of Bike Accidents'][i]) +
                         ' Accidents'),
                icon=folium.Icon(color='red',
                                 prefix='fa',
                                 icon='fas fa-bicycle')))
    over20 = folium.FeatureGroup(name='Intersections w/20 or More Accidents',
                                 overlay=False)
    for i in dang_int_20.index:
        over20.add_child(
            folium.Marker(
                dang_int_20.lat_lon_list[i],
                tooltip=(dang_int_20.intersection[i] + ':\t' +
                         str(dang_int_20['Number of Bike Accidents'][i]) +
                         ' Accidents'),
                icon=folium.Icon(color='red',
                                 prefix='fa',
                                 icon='fas fa-bicycle')))

    nyc_map.add_child(over10)
    nyc_map.add_child(over15)
    nyc_map.add_child(over20)

    plugins.TimestampedGeoJson(
        {
            'type': 'FeatureCollection',
            'features': features
        },
        period='P1M',
        add_last_point=True,
        auto_play=True,
        loop=False,
        max_speed=2,
        loop_button=True,
        date_options='YYYY-MM-DD',
        time_slider_drag_update=True,
        duration='P1M').add_to(nyc_map)

    folium.LayerControl().add_to(nyc_map)
    nyc_map.save('app/static/map_nyc.html')
    """
    *********************************************
    Bike crash causes
    *********************************************
    """
    # Decided not to use the below for now.  Could use it in the future...

    bike_list = ['BIKE', 'BICYCLE', 'E-BIK', 'BICYCLE', 'BYCIC']
    cause_df = df[((pd.isna(df.contributing_factor_vehicle_3) == True) &
                   ((df.vehicle_type_code1.isin(bike_list) == True) |
                    (df.vehicle_type_code2.isin(bike_list) == True)))]

    cause_df = cause_df[(cause_df.vehicle_type_code1.isin(bike_list) == False)
                        |
                        (cause_df.vehicle_type_code2.isin(bike_list) == False)]

    def bike_cause(x):
        if x.vehicle_type_code1 in bike_list:
            return x.contributing_factor_vehicle_1
        else:
            return x.contributing_factor_vehicle_2

    def veh_cause(x):
        if x.vehicle_type_code1 not in bike_list:
            return x.contributing_factor_vehicle_1
        else:
            return x.contributing_factor_vehicle_2

    cause_df['bike_cause'] = cause_df.apply(bike_cause, axis=1)
    cause_df['veh_cause'] = cause_df.apply(veh_cause, axis=1)

    # remove Unspecified from dataset. Not useful

    bike_cause_df = (cause_df.groupby(
        'bike_cause').collision_id.count().reset_index().sort_values(
            'collision_id', ascending=False).head(15).reset_index(drop=True))
    bike_cause_df = bike_cause_df[bike_cause_df.bike_cause != 'Unspecified']

    veh_cause_df = (cause_df.groupby(
        'veh_cause').collision_id.count().reset_index().sort_values(
            'collision_id', ascending=False).head(15).reset_index(drop=True))
    veh_cause_df = veh_cause_df[veh_cause_df.veh_cause != 'Unspecified']
コード例 #20
0
    #filname_csv = (os.getcwd() + "\Accuracy_error_metrics.csv")

    #print(filename)
    print("S3 bucket successfully created")

    print("Model successfully uploaded to S3")
except Exception as e:
    print(e)

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from datetime import timedelta

default_args = {
    'owner' = 'milony'
    'depends_on_past' = False
    'start_date' = dt(2018,4,12)
    'retries' = 1
    'retry_delay': timedelta(minutes=1)
}

dag = DAG('Assignment_3', default_args=args, schedule_interval= '@once' )

t0 = PythonOperator(
    task_id='performance_metrics',
    python_callable=performance_metrics,
    provide_context = True,  
    op_kwargs = {'dataset':dataset},
    dag=dag)
コード例 #21
0
ファイル: stockMain.py プロジェクト: gary258796/stockPython
def add_years(original_time, add_years):
    try:
        return original_time.replace(year=original_time.year + add_years)
    except ValueError:
        return original_time + (dt(original_time.year + add_years, 1, 1) - dt(original_time.year, 1, 1))
コード例 #22
0
                       className='element-table'),
              dcc.Interval(id='table-update',
                           interval=2000,
                           n_intervals=0),
          ]),
 # Hystory graph
 html.Div(id='history-graph-container',
          children=[
              html.Br(),
              html.Label('Select hour to display'),
              dcc.DatePickerSingle(
                  id="date-input",
                  display_format="YYYY-M-D",
                  month_format='MMMM Y',
                  placeholder='MMMM Y',
                  date=dt(2018, 5, 24)),
              html.Label('Select hour to display'),
              dcc.Dropdown(id='hour-dropdown',
                           options=[{
                               'label': '00 h.',
                               'value': 0
                           }, {
                               'label': '01 h.',
                               'value': 1
                           }, {
                               'label': '02 h.',
                               'value': 2
                           }, {
                               'label': '03 h.',
                               'value': 3
                           }, {