Beispiel #1
0
def getDay(day, derived_day, month, year):
    if (day == '*'):
        if not math.isnan((derived_day)):
            return int(derived_day)
        else:
            return calendar.monthrange(year, month)[1]
    return day
    def reposicionar_centroids(self, centroids: list, matriz: pd.DataFrame):
        print("===================")
        print(f"Centroid Entrada: {centroids}")

        self.centroids: list = centroids
        for _indice in range(0, len(centroids)):
            # Filtrar pro grupo de centroids
            filtro = f"centroid_id == 'centroid_{_indice}'"
            sub_conjunto: pd.DataFrame = matriz.query(filtro)
            if math.isnan(sub_conjunto.mean().ponto_x) or math.isnan(sub_conjunto.mean().ponto_y):
                continue
            # self.centroids[_indice] = [round(sub_conjunto.mean().ponto_x), round(sub_conjunto.mean().ponto_y)]
            self.centroids[_indice] = [sub_conjunto.mean().ponto_x, sub_conjunto.mean().ponto_y]

        print(f"Centroid Saida:   {self.centroids}")

        return self.centroids
Beispiel #3
0
def getYear(year, derived_year):
    if (year == '*'):
        if not math.isnan(derived_year):
            return int(derived_year)
        else:
            #raise ValueError('Can not derive year')
            return -1
    else:
        return int(year)
Beispiel #4
0
def getClosureForFolder(v):
    folder = v['folder']
    if (folder == 'folder'):
        piece = v['piece']
        item = v['item']
        s1 = df.loc[df['piece'].eq(piece)
                    & (df['item'].eq(item) if not math.isnan(item) else True) &
                    (df['closure_type'].str.startswith('open_on_transfer'))]
        if (s1.empty):
            #check if I have closed_until or unknown_status
            s2 = df.loc[df['piece'].eq(piece) &
                        (df['item'].eq(item) if not math.isnan(item) else True)
                        & (df['closure_type'].str.startswith('closed_until'))]
            if (s2.empty):
                return 'unknown_status'
            else:
                return 'closed_until'
        else:
            return 'open_on_transfer'
    else:
        return v['closure_type']
Beispiel #5
0
def populateClosureStartDate(v):
    from datetime import datetime
    dateOfBirth = v['date_of_birth']
    piece = v['piece']
    item = v['item']
    closure_type = v['closure_type']
    if (closure_type == 'closed_until'):
        if (pd.isnull(dateOfBirth)):
            #if I don't have a date of birth, I must copy the latest date of birth from the files
            df1 = df.loc[df['piece'].eq(piece)
                         & (df['item'].eq(item) if not math.isnan(item) else
                            True)]['date_of_birth'].copy()
            df1.sort(ascending=False)
            latestDate = df1.iloc[0]
            return (datetime.strftime(latestDate, '%Y-%m-%dT%H:%M:%S'))
        else:
            return str(datetime.strftime(dateOfBirth, '%Y-%m-%dT%H:%M:%S'))
Beispiel #6
0
def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    angle = np.arccos(np.dot(v1_u, v2_u))
    if math.isnan(angle):
        if (v1_u == v2_u).all():
            return 0.0
        else:
            return 180
    return math.degrees(angle)
def main():
    args = parse_args()

    creator.create("Maximum", base.Fitness, weights=(1.0, ))
    creator.create("Minimum", base.Fitness, weights=(-1.0, ))
    fitness = creator.Minimum if args.minimum else creator.Maximum
    creator.create("Particle",
                   list,
                   exemplar=list,
                   best=None,
                   no_improvement_counter=0,
                   fitness=fitness,
                   fragrance=float)

    best_histories = []
    epochs = []
    accuracies = []
    best_values = []
    for i in range(args.iteration):
        result = run_boa(args)
        best_values.append(result[1])
        save_fitness_history("../results/" + args.logCatalog + "/", result[4])
        best_histories.append(result[3])
        accuracies.append(result[2])
        if accuracies[i] <= args.accuracy:
            epochs.append(result[0])

    save_best_fitness_history("../results/" + args.logCatalog + "/",
                              best_histories)
    filtered_best_values = [
        value for value in best_values
        if not (math.isinf(value) or math.isnan(value))
    ]
    display_and_save_results(epochs, filtered_best_values, accuracies,
                             args.accuracy,
                             "../results/" + args.logCatalog + "/")
            dict_data['fsa:CashAndCashEquivalents'] = datalisten[i, dataslut]
            dict_data['fsa:CashAndCashEquivalents_prev'] = datalisten[
                i, dataslut - 1]
        if datalisten[i, 1] == 20000:
            dict_data['fsa:Equity'] = datalisten[i, dataslut]
            dict_data['fsa:Equity_prev'] = datalisten[i, dataslut - 1]
        if datalisten[i, 1] == 49500:
            dict_data['fsa:ProfitLoss'] = datalisten[i, dataslut]
            dict_data['fsa:ProfitLoss_prev'] = datalisten[i, dataslut - 1]
        if datalisten[i, 1] == 49100:
            dict_data[
                'fsa:ProfitLossFromOrdinaryOperatingActivities'] = datalisten[
                    i, dataslut]
            dict_data[
                'fsa:ProfitLossFromOrdinaryOperatingActivities_prev'] = datalisten[
                    i, dataslut - 1]

    slettes = []
    for noegle in dict_data.keys():
        if math.isnan(dict_data[noegle]):
            slettes.append(noegle)
    for noegle in slettes:
        del dict_data[noegle]

    transform_spain = spain_to_dict()
    transformed_data = transform_spain.transform([dict_data])
    output.append(transformed_data[0])

    print(clf_EW_spain.predict([dict_data]),
          clf_EW_spain.predict_proba([dict_data]))
Beispiel #9
0
def sectionSpeed(numOfHours,numOfDays,trainData,firstTime,n,minLon, lonLen, minLat, latLen):
    '''计算路段平均速度[地区号][时段]
        n表示将整个地图分成n*n个网格,地区号是其一维下标'''
    #缺乏数据默认的速度
    defaultVel=5.0
    
    #初始化三维数组speed[地区号][时段][天数],值为i地区j时段第k天的速度
    speed=[]
    for i in range(n*n):
        li=[]
        speed.append(li)
        for j in range(numOfHours):
            lj=[]
            li.append(lj)
            for k in range(numOfDays):
                lj.append(0.0)
    
    for file in trainData:
        df=pandas.read_csv(file, 
                           header=None, 
                           names=["taxiId","lat","lon","busy","time"], 
                           dtype={"taxiId":numpy.int16,"lat":numpy.double,"lon":numpy.double,
                                  "busy":numpy.int8,"time":numpy.str})
        #df按照出租车id,时间点升序排序
        df.sort_values(by=["taxiId","time"], axis=0, ascending=[True,True], inplace=True)
        
        taxiId1=-1
        lat1=0
        lon1=0
        sectionId1=0
        time1=firstTime
        temp=[]
        for i in range(n*n):
            li=[]
        for row in df.itertuples(index=False):
            taxiId2=row[0]
            lat2=row[1]
            lon2=row[2]
            time2=datetime.datetime.strptime(row[4],"%Y/%m/%d %H:%M:%S")
            if taxiId1==taxiId2 and time1.hour==time2.hour:
                v=calDistance(lon1, lat1, lon2, lat2)/((time2-time1).seconds)
#                 print(calDistance(lon1, lat1, lon2, lat2))
#                 print((time2-time1).seconds)
#                 print("seid="+str(sectionId1)+"hourid="+str(time1.hour-firstTime.hour)+"dayId="+str((time1-firstTime).days))
                speed[sectionId1][time1.hour-firstTime.hour][(time1-firstTime).days].append(v)
            taxiId1=taxiId2;lat1=lat2;lon1=lon2;time1=time2
#             print(sectionId1)
            sectionId1=calSectionId(lon1, lat1,minLon, lonLen, minLat, latLen, n)
    
    res=[]
    for i in range(n*n):
        li=[]
        res.append(li)
        for j in range(numOfHours):
            temp=[]
            for k in range(numOfDays):
                temp.append(numpy.mean(speed[i][j][k]))
            if math.isnan(temp[0]):
                li.append(defaultVel)
            else:
                li.append(numpy.mean(temp))
            
    return res
def is_nan(x):
    return isinstance(x, float) and math.isnan(x)
Beispiel #11
0
import json
from numpy import math

try:
    litindex_json_files = glob.glob("./rawdata/*.json")
    conn = psycopg2.connect(
        "dbname='litindex' user='******' host='0.0.0.0' password='******'")
    cur = conn.cursor()

    for onefile in litindex_json_files:
        json_records = pd.read_json(onefile, lines=True)
        for index, row in json_records.iterrows():
            print(row['id'])
            # print(row['institution_id'])
            rowId = row['id']
            if math.isnan(rowId):
                rowId = 0
                # print("NAN row_id =",rowId)
            rowInstitutionId = row['institution_id']
            if math.isnan(rowInstitutionId):
                rowInstitutionId = 0.0
                # print("NAN rowInstitutionId =",rowInstitutionId)
            year = row['year']
            if math.isnan(year):
                year = 0
                # print("NAN year =",year)
            cur.execute(
                "INSERT INTO open_syllabi(id, source_url, source_anchor, syllabus_probability, year, field_name, institution_id, grid_name, grid_country_code, text_md5, text) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);",
                (
                    rowId,
                    row['source_url'],
Beispiel #12
0
        ('fsa:ProfitLoss_prev', 92),
        ('fsa:ProfitLoss', 93),
    ):
        vaerdi = datalist[virksomhed][col]
        dict_input_poland[tekst] = vaerdi
    dict_input_poland['fsa:Assets'] = np.nan_to_num(
        dict_input_poland['fsa:NoncurrentAssets']) + np.nan_to_num(
            dict_input_poland['fsa:CurrentAssets']) + np.nan_to_num(
                dict_input_poland['fsa:Prepayments'])
    dict_input_poland['fsa:Assets_prev'] = np.nan_to_num(
        dict_input_poland['fsa:NoncurrentAssets_prev']) + np.nan_to_num(
            dict_input_poland['fsa:CurrentAssets_prev']) + np.nan_to_num(
                dict_input_poland['fsa:Prepayments_prev'])
    slettes = []
    for noegle in dict_input_poland.keys():
        if math.isnan(dict_input_poland[noegle]):
            slettes.append(noegle)
    for noegle in slettes:
        del dict_input_poland[noegle]

    transform_poland = Polish_to_dict()
    transformed_data = transform_poland.transform([dict_input_poland])
    output.append(transformed_data[0])

    #print(dict_input_poland)
    print(virksomhed, clf_EW_poland.predict_proba([dict_input_poland]))

#df_out = pd.DataFrame.from_dict(output)
#
#import matplotlib.pyplot as plt
##print(df_out.columns)