Beispiel #1
0
 def compute_outputs(self):
     for i in range(len(self.nodes)):
         output = self.totals[i]
         self.outputs[i] = output
     outputs = sort(self.outputs)
     size = int(len(outputs) * self.psize)
     for i in range(len(outputs)):
         n = outputs[i]
         if i < size: self.outputs[n] = 1
         else: self.outputs[n] = 0
def home():
    data = request.get_json()
    #logging.info("Request received: {}".format(data))
    command = data['command']
    res = {}

    if(command=='ping'):
        res = functions.ping_pong()
    elif(command=='sample'):
        res = functions.sample(data['size'], data['begin'], data['end'])
    elif(command=='sort'):
        res = functions.sort(data)

    logging.info("Response: {}".format(res))
    print(jsonify(res))
    return jsonify(res)
Beispiel #3
0
        parents[i] = range(n_cities)

        np.random.shuffle(parents[i])
    best = []  #this boy picks the best of each simulation
    strd = 0  #this guy is for standard error measurements
    means = []

    #Action
    for j in range(n_sims):
        for gen in range(generations):

            offspring = partially_mapped(parents, distances, n_cities, N_pop)

            new_population = np.append([offspring], [parents], axis=1)

            path_lengths, population = sort(new_population, distances)

            removables = len(path_lengths) - N_pop

            for i in range(removables):
                path_lengths = np.delete(path_lengths, len(path_lengths) - 1)
            parents = []
            for i in range(N_pop):
                parents.append(population[i])

        for populant in range(N_pop):
            parents[populant], path_lengths[populant] = hillclimb2(
                parents[populant], distances)

        means.append(np.mean(path_lengths))
        best.append(path_lengths[0])  #collecting shit for standard deviation
Beispiel #4
0
def run_functions(curr_spreadsheet, query_string, query_parameters):
    print("Query string: " + query_string)
    # ADD <ENTRY_1> <ENTRY_2>
    # <ENTRY_1>: row 4
    commands = query_string.split(".")
    sheet = curr_spreadsheet.sheet
    for i in range(len(commands)):
        args_and_comm = commands[i].split(" ")
        command_name = args_and_comm[0]
        args = []

        for k in range(1, len(args_and_comm)):
            result = args.append(query_parameters[args_and_comm[k]])

        result = None
        
        if command_name == "ADD":
            result = functions.add(sheet, args[0], args[1])
        elif command_name == "INSERTAFT":
            if args[0].rowOrCol == 'col':
                result = functions.insert_entry(sheet, chr(ord(args[0].value) + 1), args[0].rowOrCol)
            else:
                result = functions.insert_entry(sheet, int(args[0].value) + 1, args[0].rowOrCol)
        elif command_name == "INSERTBEF":
            if args[0].rowOrCol == 'col':
                result = functions.insert_entry(sheet, chr(ord(args[0].value)), args[0].rowOrCol)
            else:
                result = functions.insert_entry(sheet, int(args[0].value), args[0].rowOrCol)
        elif command_name == "SET":
            if(isinstance(args[0], float)):
                result = functions.cell_update(sheet, args[1].cell_str, args[0])
            else:
                result = functions.cell_update(sheet, args[1].cell_str, args[0].value)
        elif command_name == "AVG":
            result = functions.average_entry(sheet, args[0].value, args[0].rowOrCol)
        elif command_name == "BOLD":
            if(isinstance(args[0], Cell)):
                result = functions.format_bold(sheet, str(args[0].cell_str) + ":" + str(args[0].cell_str))
            else:
                result = functions.format_bold_entry(sheet, args[0])
        elif command_name == "SET_BG":
            if(len(args) == 2):
                if(isinstance(args[1], Cell)):
                    result = functions.set_background(sheet, args[1].cell_str + ":" + args[1].cell_str, args[0].color_str)
                else:
                    result = functions.set_background_entry(sheet, args[1], args[0].color_str)
            else:
                result = functions.set_background(sheet, str(args[1].cell_str) + ":" + str(args[2].cell_str), args[0].color_str)
        elif command_name == "MULTIPLY":
            if(isinstance(args[1], Entry)):
                result = functions.multiply_entry(sheet, int(args[0].value), args[1].value, args[1].rowOrCol == "row")
            else:
                result = functions.multiply_cell(sheet, int(args[0].value), args[1].cell_str)
        elif command_name == "SIN":
            if(len(args) == 1):
                if (isinstance(args[0], Cell)):
                    result = functions.sin_cell(sheet, args[0].cell_str)
                else:
                    # Don't know if this works
                    result = functions.sin_entry(sheet, ord(args[0].value) - ord("A") + 1, args[0].rowOrCol == "col")
            else:
                result = functions.sin_range(sheet, str(args[0].cell_str) + ":" + str(args[1].cell_str))
        elif command_name == "COS":
            if(len(args) == 1):
                if (isinstance(args[0], Cell)):
                    result = functions.cos_cell(sheet, args[0].cell_str)
                else:
                    result = functions.cos_entry(sheet, args[0].value, args[0].rowOrCol == "col")
            else:
                result = functions.cos_range(sheet, str(args[0].cell_str) + ":" + str(args[1].cell_str))
        elif command_name == "SORT":
            # result = functions.sort(sheet, args[0])
            if(args[0].rowOrCol == 'col'):
                result = functions.sort(sheet, (ord(args[0].value) - ord("A") + 1, 'asc'))
        # elif command_name == "FILTER_EVEN":
        #     result = functions.filter_even(sheet, args[0])
        # elif command_name == "FILTER_ODD":
        #     result = functions.filter_odd(sheet, args[0])
        elif command_name == "FILTER_PRIME":
            result = functions.filter_by_prime(sheet)
        # elif command_name == "MAX_VAL":
        #     result = functions.max_val(sheet, args[0])
        # elif command_name == "NORMALIZE":
        #     result = functions.normalize(sheet, args[0], args[1])
        
        query_parameters["<RES_" + str(i + 1) + ">"] = result
Beispiel #5
0
buttons.grid_columnconfigure(0, weight=1)
buttons.grid_rowconfigure(0, weight=1)

# Treeview
container = ttk.Frame()
container.pack(fill='both', expand=True)

treeColumns = ("Item", "ID", "Price", "Available", "Checked out",
               "Description")
tree = ttk.Treeview(columns=treeColumns, show="headings")

for column in treeColumns:
    tree.heading(column,
                 text=column,
                 command=lambda c=column: functions.sort(tree, c, 0))

for i in treeColumns:
    tree.column(i, width=130, minwidth=30)

vsb = ttk.Scrollbar(orient="vertical",
                    command=tree.yview)  # Vertical scroll bar
hsb = ttk.Scrollbar(orient="horizontal",
                    command=tree.xview)  # Horizontal scroll bar

tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)

vsb.grid(column=1, row=0, sticky='ns', in_=container)
hsb.grid(column=0, row=1, sticky='ew', in_=container)
tree.grid(column=0, row=0, sticky='nsew', in_=container)
Beispiel #6
0
from math import sqrt

N = 1000
chars = ['avr', 'med', 'zr', 'zq', 'ztr']
arr = [10, 100, 1000]
print("Лабораторная работа №2; Выполнила: Фомина Дарья\n")
norm = dst.Normal(1, 0)
print("/************************************************/\n")
print("Normal distribution\n")
for n in arr:
    print(f'n={n}')
    res = {}
    for i in range(5):
        res[i] = [0, 0]
    for i in range(N):
        s = f.sort(norm, n)
        z = f.avr(s)
        res[0][0] += z / N
        res[0][1] += z * z / N
        z = f.med(s)
        res[1][0] += z / N
        res[1][1] += z * z / N
        z = f.zr(s)
        res[2][0] += z / N
        res[2][1] += z * z / N
        z = f.zq(s)
        res[3][0] += z / N
        res[3][1] += z * z / N
        z = f.ztr(s)
        res[4][0] += z / N
        res[4][1] += z * z / N
Beispiel #7
0
    userInput = input("Choose a index: ")

    index = int(userInput)

    # Get key by value
    if index in headers.values(
    ) and index not in nonNumericIndexes and index is not 0:
        index = int(userInput)
        for key, value in headers.items():
            if value == index:
                header = key

        # Sort and show all data, functions in external file 'functions.py'
        dataset = copy.copy(data[index])
        print(dataset)
        sortedData = functions.sort(data[index])
        print(header)
        print(f"Average:            \n{functions.average(dataset)}")
        _modus = functions.modus(dataset)
        print(f"Modus:              \n{_modus[0]}: {_modus[1]} time(s)")
        print(f"Median:             \n{functions.median(sortedData)}")
        print(f"Standard deviaton:  \n{functions.standardDeviation(dataset)}")

        # Show histogram with confindence interval
        fig, ax = plt.subplots()

        binwidth = (max(dataset) - min(dataset)) / 40

        plt.hist(dataset,
                 bins=np.arange(min(dataset),
                                max(dataset) + binwidth, binwidth))
Beispiel #8
0
def launch_functions_menu():
    print("Functions Section - Enter the exercise number you want to run")
    selection = 0

    while selection != 13:
        print("exercise #1")
        print("exercise #2")
        print("exercise #3")
        print("exercise #4")
        print("exercise #5")
        print("exercise #6")
        print("exercise #7")
        print("exercise #8")
        print("exercise #9")
        print("exercise #10")
        print("exercise #11")
        print("exercise #12")
        print("exit #13")

        selection = int(input("Insert Selection = "))

        if selection == 1:
            number = int(input("Enter a number to see if it's prime "))
            print(functions.is_prime(number))
        elif selection == 2:
            number = int(input("Get all prime numbers smaller than "))
            functions.prime_numbers_smaller_than(number)
        elif selection == 3:
            number = int(input("Fibonacci sequence with elements count = "))
            functions.fibonacci_sequence(number)
        elif selection == 4:
            print("my_map function")
            functions.exercise_4()
        elif selection == 5:
            print("my_filter function")
            functions.exercise_5()
        elif selection == 6:
            print("my_reduce function")
            functions.exercise_6()
        elif selection == 7:
            number = int(input("Special Sum of numbers smaller than "))
            print(functions.sum_of_special_numbers_smaller_than(number))
        elif selection == 8:
            print("sort [4, 2, 3, 1] {0}".format(functions.sort([4, 2, 3, 1])))
        elif selection == 9:
            print("Check if 1 in list [1, 4, 5] ")
            print(functions.element_in_list(1, [1, 4, 5]))
        elif selection == 10:
            print("call stateful function")
            print(functions.stateful_func())
            print(functions.stateful_func())
            print(functions.stateful_func())

            print("----- new state -----")

            new_state = functions.State()
            print(functions.stateful_func(new_state))
            print(functions.stateful_func(new_state))

            print("----- old state -----")
            print(functions.stateful_func())
        elif selection == 11:
            print("measure the duration of a function")
            print(functions.my_func())
        elif selection == 12:
            print("Total execution time for multiple functions")
            print("----- Total Execution Time -------")
            print(functions.my_func())
            print("----- Total Execution Time -------")
            print(functions.my_func2())
        elif selection == 13:
            print("Exit")
def action_statistics(action_path, outputpath):

    cm_data_raw = pd.read_table(action_path, sep=',', encoding='utf-8')
    #对user action data 进行统计处理。

    names = ['userid', 'actionType', 'actionTime']

    newCust = pd.DataFrame(columns=[
        "userid", 'totalstep', 'step1N', 'step2N', 'step3N', 'step4N',
        'step5N', 'step6N', 'step7N', 'step8N', 'step9N', 'step1P', 'step2P',
        'step3P', 'step4P', 'step5P', 'step6P', 'step7P', 'step8P', 'step9P',
        'viewProductN', 'last1time', 'last2time', 'last3time', 'last4time',
        'last5time', 'last6time', 'last7time', 'last8time', 'last9time',
        'buy/viewProductN', 'buy/n5', '(2-4)/(1)', 'tail1', 'tail2', 'tail3',
        'tail4', 'head1', 'maxtime', 'mintime', 'averagetime', 'vartime',
        'mediantime', 'tailtime1', 'tailtime2', 'tailtime3', 'tailtime4',
        'tail3ave', 'tail3var'
    ])
    idlist = []

    for i in range(0, len(cm_data_raw)):
        record = cm_data_raw.iloc[i]
        uid = record['userid']

        if uid not in idlist:
            idlist.append(uid)
            udata = cm_data_raw[cm_data_raw['userid'] == uid]
            udata = sort(udata, ["actionType", "actionTime"], ascending=False)

            #总步数为
            totalStep = 0

            #记录每一步击中数量
            n1 = 0
            n2 = 0
            n3 = 0
            n4 = 0
            n5 = 0
            n6 = 0
            n7 = 0
            n8 = 0
            n9 = 0
            #记录每一步击中数量比例
            p1 = 0
            p2 = 0
            p3 = 0
            p4 = 0
            p5 = 0
            p6 = 0
            p7 = 0
            p8 = 0
            p9 = 0
            #第一次击中步骤时间标签。用于识别最近一次到达此步骤。
            f1 = 0
            f2 = 0
            f3 = 0
            f4 = 0
            f5 = 0
            f6 = 0
            f7 = 0
            f8 = 0
            f9 = 0
            #第一次击中某一步骤的时间
            t1 = 0
            t2 = 0
            t3 = 0
            t4 = 0
            t5 = 0
            t6 = 0
            t7 = 0
            t8 = 0
            t9 = 0

            #倒数几步为何总总type
            tail1 = 0
            tail2 = 0
            tail3 = 0
            tail4 = 0
            #第一步为何type
            head1 = 0
            #时间间隔初始化
            timeslot = []
            times1 = 0
            ptime = 0
            ctime = 0
            #时间间隔相关统计
            maxtime = 0
            mintime = 0
            averagetime = 0
            vartime = 0
            mediantime = 0
            #倒数实践间隔
            tail3a = []
            tailtime1 = 0
            tailtime2 = 0
            tailtime3 = 0
            tailtime4 = 0

            tail3ave = 0
            tail3var = 0

            #对一个用户行为数据信息进行统计
            for j in range(0, len(udata)):

                oudata = udata.iloc[j]
                #倒数的type
                if j == 0:
                    tail1 = oudata['actionType']
                    tailtime1 = oudata['actionTime']
                if j == 1:
                    tail2 = oudata['actionType']
                    tailtime2 = oudata['actionTime']

                if j == 2:
                    tail3 = oudata['actionType']
                    tailtime3 = oudata['actionTime']

                if j == 3:
                    tail4 = oudata['actionType']
                    tailtime4 = oudata['actionTime']

                #每个行为每个用户使用过几次
                if oudata['actionType'] == 1:
                    n1 = n1 + 1
                    if f1 == 0:
                        f1 = 2
                        t1 = oudata['actionTime']
                if oudata['actionType'] == 2:
                    n2 = n2 + 1
                    if f2 == 0:
                        f2 = 2
                        t2 = oudata['actionTime']

                if oudata['actionType'] == 3:
                    n3 = n3 + 1
                    if f3 == 0:
                        f3 = 2
                        t3 = oudata['actionTime']
                if oudata['actionType'] == 4:
                    n4 = n4 + 1
                    if f4 == 0:
                        f4 = 2
                        t4 = oudata['actionTime']

                if oudata['actionType'] == 5:
                    n5 = n5 + 1
                    if f5 == 0:
                        f5 = 2
                        t5 = oudata['actionTime']

                if oudata['actionType'] == 9:
                    n9 = n9 + 1
                    if f9 == 0:
                        f9 = 2
                        t9 = oudata['actionTime']
                if oudata['actionType'] == 8:
                    n8 = n8 + 1
                    if f8 == 0:
                        f8 = 2
                        t8 = oudata['actionTime']
                if oudata['actionType'] == 7:
                    n7 = n7 + 1
                    if f7 == 0:
                        f7 = 2
                        t7 = oudata['actionTime']
                if oudata['actionType'] == 6:
                    n6 = n6 + 1
                    if f6 == 0:
                        f6 = 2
                        t6 = oudata['actionTime']

                if oudata['actionType'] == 5:
                    n5 = n5 + 1
                    if f5 == 0:
                        f5 = 2
                        t5 = oudata['actionTime']
                head1 = oudata['actionType']

                #进行时间间隔计算
                if j != 0:
                    ctime = oudata['actionTime']
                    times1 = ctime - ptime
                    ptime = ctime
                    timeslot.append(times1)

                else:
                    ptime = oudata['actionTime']

                if len(timeslot) != 0:
                    maxtime = np.max(timeslot)
                    mintime = np.min(timeslot)

                    averagetime = np.average(timeslot)
                    mediantime = np.median(timeslot)
                    vartime = np.var(timeslot)
                    tail3a = timeslot[-3:]
                    tail3ave = np.average(tail3a)
                    tail3var = np.var(tail3a)

            if n2 + n3 + n4 != 0:
                buy_viewProductN = n9 / (n2 + n3 + n4)
            elif n9 == 0:
                buy_viewProductN = 0
            elif n9 != 0 and n2 + n3 + n4 == 0:
                buy_viewProductN = 1

            if n5 != 0:
                buy_5 = n9 / n5
            else:
                buy_5 = 0

            if n2 + n3 + n4 != 0:
                p59_24 = (n5 + n6 + n7 + n8 + n9) / (n2 + n3 + n4)
            else:
                p59_24 = 1

            if p59_24 > 1:
                p59_24 = 1

            if n1 != 0:
                p24_1 = (n2 + n3 + n4) / n1
            else:
                p24_1 = 0

            totalStep = n1 + n2 + n3 + n4 + n5 + n6 + n7 + n8 + n9
            p1 = n1 / totalStep
            p2 = n2 / totalStep
            p3 = n3 / totalStep
            p4 = n4 / totalStep
            p5 = n5 / totalStep
            p6 = n6 / totalStep
            p7 = n7 / totalStep
            p8 = n8 / totalStep
            p9 = n9 / totalStep

            finalud = {
                "userid": uid,
                'totalStep': totalStep,
                'step1N': n1,
                'step2N': n2,
                'step3N': n3,
                'step4N': n4,
                'step5N': n5,
                'step6N': n6,
                'step7N': n7,
                'step8N': n8,
                'step9N': n9,
                'step1P': p1,
                'step2P': p2,
                'step3P': p3,
                'step4P': p4,
                'step5P': p5,
                'step6P': p6,
                'step7P': p7,
                'step8P': p8,
                'step9P': p9,
                'viewProductN': n2 + n3 + n4,
                'last1time': t1,
                'last2time': t2,
                'last3time': t3,
                'last4time': t4,
                'last5time': t5,
                'last6time': t6,
                'last7time': t7,
                'last8time': t8,
                'last9time': t9,
                'buy/viewProductN': buy_viewProductN,
                'buy/n5': buy_5,
                '(2-4)/(1)': p24_1,
                'tail1': tail1,
                'tail2': tail2,
                'tail3': tail3,
                'tail4': tail4,
                'head1': head1,
                'maxtime': maxtime,
                'mintime': mintime,
                'averagetime': averagetime,
                'vartime': vartime,
                'mediantime': mediantime,
                'tailtime1': tailtime1,
                'tailtime2': tailtime2,
                'tailtime3': tailtime3,
                'tailtime4': tailtime4,
                'tail3ave': tail3ave,
                'tail3var': tail3var
            }

            newCust = newCust.append(finalud, ignore_index=True)

    newCust.to_csv(outputpath)

    return
Beispiel #10
0
def action_statistics(action_path,outputpath):

    cm_data_raw=pd.read_table(action_path,sep=',',encoding='utf-8')
    #对user action data 进行统计处理。

    names=['userid','actionType','actionTime']
    
    newCust=pd.DataFrame(columns=["userid",'step1N','step2N','step3N','step4N',
                            'step5N','step6N','step7N','step8N',
                        'step9N','viewProductN','last1time','last2time','last3time','last4time',
                        'last5time','last6time','last7time','last8time','last9time','buy/viewProductN',
                     'buy/n5','(9)/(1)','(2-4)/(1)'])
    idlist=[]


    for i in range(0,len(cm_data_raw)):
        record=cm_data_raw.iloc[i]
        uid=record['userid']

        if uid not in idlist:
            idlist.append(uid)
            udata=cm_data_raw[cm_data_raw['userid']==uid]
            udata=sort(udata,["actionType","actionTime"],ascending=False)
  
            #记录每一步击中数量
            n1=0
            n2=0
            n3=0
            n4=0
            n5=0
            n6=0
            n7=0
            n8=0
            n9=0
            #第一次击中步骤时间标签。用于识别最近一次到达此步骤。
            f1=0
            f2=0
            f3=0
            f4=0
            f5=0
            f6=0
            f7=0
            f8=0
            f9=0
            #第一次击中某一步骤的时间
            t1=0
            t2=0
            t3=0
            t4=0
            t5=0
            t6=0
            t7=0
            t8=0
            t9=0
            
            
            
            
            
            #对一个用户行为数据信息进行统计
            for j in range(0,len(udata)):

                oudata=udata.iloc[j]
                #每个行为每个用户使用过几次
                if oudata['actionType']==1:
                    n1=n1+1
                    if f1==0:
                        f1=2
                        t1=oudata['actionTime']
                if oudata['actionType']==2:
                    n2=n2+1
                    if f2==0:
                        f2=2
                        t2=oudata['actionTime']
                        
                if oudata['actionType']==3:
                    n3=n3+1
                    if f3==0:
                        f3=2
                        t3=oudata['actionTime']
                if oudata['actionType']==4:
                    n4=n4+1
                    if f4==0:
                        f4=2
                        t4=oudata['actionTime']
                        
                        
                        
                        
                        
                if oudata['actionType']==5:
                    n5=n5+1
                    if f5==0:
                        f5=2
                        t5=oudata['actionTime']
                        
                
                
                if oudata['actionType']==9:
                    n9=n9+1
                    if f9==0:
                        f9=2
                        t9=oudata['actionTime']
                if oudata['actionType']==8:
                    n8=n8+1
                    if f8==0:
                        f8=2
                        t8=oudata['actionTime']
                if oudata['actionType']==7:
                    n7=n7+1
                    if f7==0:
                        f7=2
                        t7=oudata['actionTime']
                if oudata['actionType']==6:
                    n6=n6+1
                    if f6==0:
                        f6=2
                        t6=oudata['actionTime']
                        
                if oudata['actionType']==5:
                    n5=n5+1
                    if f5==0:
                        f5=2
                        t5=oudata['actionTime']
                        
                #因为数据有缺失,如果5-9有缺失就用下一步数量补充。
                if n8<n9:
                    n8=n9
                    t8=t9
                    
                if n7<n8:
                    n7=n8
                    t7=t8
                if n6<n7:
                    n6=n7
                    t6=t7
                if n5<n6:
                    n5=n6
                    t5=t6
                    
                if n2+n3+n4 !=0:
                    buy_viewProductN=n9/(n2+n3+n4)
                elif n9==0:
                    buy_viewProductN=0
                elif n9!=0 and n2+n3+n4 ==0:
                    buy_viewProductN=1
                    
                if n5 !=0:
                    buy_5=n9/n5
                else: 
                    buy_5=0
                    
                    
                if n2+n3+n4 !=0:
                    p59_24=(n5+n6+n7+n8+n9)/(n2+n3+n4)
                else: 
                    p59_24=1
                    
                if p59_24>1:
                    p59_24=1
                    
                if n1!=0:
                    p9_1=n9/n1
                else: 
                    p9_1=0
                    
                if n1!=0:
                    p24_1=(n2+n3+n4)/n1
                else: 
                    p24_1=0
                    
                    

                
                    
                    
                    
            finalud={"userid":uid,'step1N':n1,'step2N':n2,'step3N':n3,'step4N':n4,
                            'step5N':n5,'step6N':n6,'step7N':n7,'step8N':n8,
                        'step9N':n9,'viewProductN':n2+n3+n4,'last1time':t1,'last2time':t2,'last3time':t3,'last4time':t4,
                        'last5time':t5,'last6time':t6,'last7time':t7,'last8time':t8,'last9time':t9,'buy/viewProductN':buy_viewProductN,
                     'buy/n5':buy_5,'(9)/(1)':p9_1,'(2-4)/(1)':p24_1}

            newCust=newCust.append(finalud,ignore_index=True)
        
    newCust.to_csv(outputpath)
                
                        
                
    return
def up_oh_statistics(up_path, oh_path, outputpath):

    up_op_data = datatonumber(up_path, oh_path)
    up_data = up_op_data[1]
    oh_data = up_op_data[0]
    #对orderhistory data 进行统计处理。

    names = [
        'userid', 'orderid', 'orderTime', 'orderType', 'contientN', 'cityN',
        'countryN'
    ]
    prev_cus_id = '0'
    newCust = pd.DataFrame(columns=[
        "userid", 'nearestOrderTime', 'nearestCity', 'nearestCountry',
        'nearestContinent', 'totalOrder', 'numberOftype0ne', 'typeOnePer',
        'numberofCity', 'numberofCountry', 'NumberofU', 'NumberofNA',
        'NumberofA', 'NumberofO', 'NumberofAF', 'NumberofSA'
    ])
    idlist = []

    for i in range(0, len(oh_data)):
        record = oh_data.iloc[i]
        uid = record['userid']

        if uid not in idlist:
            idlist.append(uid)
            udata = oh_data[oh_data['userid'] == uid]
            udata = sort(udata, ["orderTime", "orderType"], ascending=False)
            prv_time = []

            totalOrder = 0
            numberOftype0ne = 0
            numberofCity = 0
            numberofCountry = 0
            NumberofU = 0
            NumberofA = 0
            NumberofNA = 0
            NumberofSA = 0
            NumberofAF = 0
            NumberofO = 0
            cityList = []
            countryList = []
            #对一个用户历史信息进行统计
            for j in range(0, len(udata)):

                oudata = udata.iloc[j]
                odtime = oudata['orderTime']
                ot = oudata['orderType']
                ct = oudata['cityN']
                coun = oudata['countryN']
                cn = oudata['continentN']

                #最近一次去过的城市,国家,以及大洲,因为之前userdata 按照时间大小排序,所以第一个就是最近的订单
                if j == 0:
                    nearestCity = ct
                    nearestCou = coun
                    nearestCn = cn
                    nearestOd = odtime

                if odtime not in prv_time:
                    prv_time.append(odtime)
                    #总订单数,同一时间订单视为父子订单只计一个订单
                    totalOrder = totalOrder + 1
                    #精品游订单数量统计
                    if ot == 1:
                        numberOftype0ne = numberOftype0ne + 1
                    #去过的城市列表,用于计算去过几个城市
                    if ct not in cityList:
                        cityList.append(ct)
                    #去过的国家列表,用于计算去过几个国家
                    if coun not in countryList:
                        countryList.append(coun)
                    #去过各个大洲几次
                    if cn == 'U':
                        NumberofU = NumberofU + 1
                    if cn == 'NA':
                        NumberofNA = NumberofNA + 1
                    if cn == 'A':
                        NumberofA = NumberofA + 1
                    if cn == 'O':
                        Numberof0 = NumberofO + 1
                    if cn == 'AF':
                        NumberofAF = NumberofAF + 1
                    if cn == 'SA':
                        NumberofSA = NumberofSA + 1

            finalud = {
                'userid': uid,
                'nearestOrderTime': nearestOd,
                'nearestCity': nearestCity,
                'nearestCountry': nearestCou,
                'nearestContinent': nearestCn,
                'totalOrder': totalOrder,
                'numberOftype0ne': numberOftype0ne,
                'typeOnePer': numberOftype0ne / totalOrder,
                'numberofCity': len(cityList),
                'numberofCountry': len(countryList),
                'NumberofU': NumberofU,
                'NumberofNA': NumberofNA,
                'NumberofA': NumberofA,
                'NumberofO': NumberofO,
                'NumberofAF': NumberofAF,
                'NumberofSA': NumberofSA
            }

            newCust = newCust.append(finalud, ignore_index=True)
    #用order history 统计好的表left join user profile表得出一个新表并输出
    up_oh_data = pd.merge(up_data, newCust, how='outer', on='userid')
    data_1 = up_oh_data[[
        'genderN', 'provinceN', 'nearestCity', 'nearestCountry',
        'nearestContinent'
    ]]
    data_2 = pd.get_dummies(data_1)
    result_1 = pd.concat([up_oh_data, data_2], axis=1)
    result_1.drop([
        'genderN', 'provinceN', 'nearestCity', 'nearestCountry',
        'nearestContinent'
    ],
                  axis=1,
                  inplace=True)
    result_1.to_csv(outputpath)
    return
def action_statistics(action_path, outputpath):

    cm_data_raw = pd.read_table(action_path, sep=',', encoding='utf-8')
    #对user action data 进行统计处理。

    names = ['userid', 'actionType', 'actionTime']

    newCust = pd.DataFrame(columns=[
        "userid", 'totalstep', 'step1N', 'step2N', 'step3N', 'step4N',
        'step5N', 'step6N', 'step7N', 'step8N', 'step9N', 'step1P', 'step2P',
        'step3P', 'step4P', 'step5P', 'step6P', 'step7P', 'step8P', 'step9P',
        'viewProductN', 'last1time', 'last2time', 'last3time', 'last4time',
        'last5time', 'last6time', 'last7time', 'last8time', 'last9time',
        'buy/viewProductN', 'buy/n5', '(2-4)/(1)', 'tail1', 'tail2', 'tail3',
        'tail4', 'head1', 'maxtime', 'mintime', 'averagetime', 'vartime',
        'mediantime', 'tailtime1', 'tailtime2', 'tailtime3', 'tailtime4',
        'tail3ave', 'tail3var', 'near1', 'near2', 'near3', 'near4', 'near5',
        'near6', 'near7', 'nsar8', 'near9', 't1a', 't1v', 't1mini', 't1max',
        't2a', 't2v', 't2mini', 't2max', 't3a', 't3v', 't3mini', 't3max',
        't4a', 't4v', 't4mini', 't4max', 't5a', 't5v', 't5mini', 't5max',
        't6a', 't6v', 't6mini', 't6max', 't7a', 't7v', 't7mini', 't7max',
        't8a', 't8v', 't8mini', 't8max', 't9a', 't9v', 't9mini', 't9max',
        't9av'
    ])
    idlist = []

    for i in range(0, len(cm_data_raw)):
        record = cm_data_raw.iloc[i]
        uid = record['userid']

        if uid not in idlist:
            idlist.append(uid)
            udata = cm_data_raw[cm_data_raw['userid'] == uid]
            udata = sort(udata, ["actionTime", "actionType"], ascending=False)

            #总步数为
            totalStep = 0

            #记录每一步击中数量
            n1 = 0
            n2 = 0
            n3 = 0
            n4 = 0
            n5 = 0
            n6 = 0
            n7 = 0
            n8 = 0
            n9 = 0
            #记录每一步击中数量比例
            p1 = 0
            p2 = 0
            p3 = 0
            p4 = 0
            p5 = 0
            p6 = 0
            p7 = 0
            p8 = 0
            p9 = 0
            #第一次击中步骤时间标签。用于识别最近一次到达此步骤。
            f1 = 0
            f2 = 0
            f3 = 0
            f4 = 0
            f5 = 0
            f6 = 0
            f7 = 0
            f8 = 0
            f9 = 0
            #第一次击中某一步骤的时间
            t1 = 0
            t2 = 0
            t3 = 0
            t4 = 0
            t5 = 0
            t6 = 0
            t7 = 0
            t8 = 0
            t9 = 0

            #倒数几步为何总总type
            tail1 = 0
            tail2 = 0
            tail3 = 0
            tail4 = 0
            #第一步为何type
            head1 = 0
            #时间间隔初始化
            timeslot = []
            times1 = 0
            ptime = 0
            ctime = 0
            #时间间隔相关统计
            maxtime = 0
            mintime = 0
            averagetime = 0
            vartime = 0
            mediantime = 0
            #倒数实践间隔
            tail3a = []
            tailtime1 = 0
            tailtime2 = 0
            tailtime3 = 0
            tailtime4 = 0

            tail3ave = 0
            tail3var = 0

            #距离每个行为最近的行为和时间
            near1 = 0
            near2 = 0
            near3 = 0
            near4 = 0
            near5 = 0
            near6 = 0
            near7 = 0
            near8 = 0
            near9 = 0

            #距离各个节点最近时间间隔
            ne1 = []
            ne2 = []
            ne3 = []
            ne4 = []
            ne5 = []
            ne6 = []
            ne7 = []
            ne8 = []
            ne9 = []

            #对一个用户行为数据信息进行统计
            for j in range(0, len(udata)):

                oudata = udata.iloc[j]
                #倒数的type
                if j == 0:
                    tail1 = oudata['actionType']
                    tailtime1 = oudata['actionTime']
                if j == 1:
                    tail2 = oudata['actionType']
                    tailtime2 = oudata['actionTime']

                if j == 2:
                    tail3 = oudata['actionType']
                    tailtime3 = oudata['actionTime']

                if j == 3:
                    tail4 = oudata['actionType']
                    tailtime4 = oudata['actionTime']

                #每个行为每个用户使用过几次
                if oudata['actionType'] == 1:
                    n1 = n1 + 1
                    if f1 == 0:
                        f1 = 2
                        t1 = oudata['actionTime']
                        near1 = j
                        if j > 0:
                            for i in range(1, j):
                                ne1.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 2:
                    n2 = n2 + 1
                    if f2 == 0:
                        f2 = 2
                        t2 = oudata['actionTime']
                        near2 = j

                        if j > 0:
                            for i in range(1, j):
                                ne2.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 3:
                    n3 = n3 + 1
                    if f3 == 0:
                        f3 = 2
                        t3 = oudata['actionTime']
                        near3 = j

                        if j > 0:
                            for i in range(1, j):
                                ne3.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 4:
                    n4 = n4 + 1
                    if f4 == 0:
                        f4 = 2
                        t4 = oudata['actionTime']
                        near4 = j

                        if j > 0:
                            for i in range(1, j):
                                ne4.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 5:
                    n5 = n5 + 1
                    if f5 == 0:
                        f5 = 2
                        t5 = oudata['actionTime']
                        near5 = j

                        if j > 0:
                            for i in range(1, j):
                                ne5.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 9:
                    n9 = n9 + 1
                    if f9 == 0:
                        f9 = 2
                        t9 = oudata['actionTime']
                        near9 = j

                        if j > 0:
                            for i in range(1, j):
                                ne9.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 8:
                    n8 = n8 + 1
                    if f8 == 0:
                        f8 = 2
                        t8 = oudata['actionTime']
                        near8 = j

                        if j > 0:
                            for i in range(1, j):
                                ne8.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 7:
                    n7 = n7 + 1
                    if f7 == 0:
                        f7 = 2
                        t7 = oudata['actionTime']
                        near7 = j

                        if j > 0:
                            for i in range(1, j):
                                ne7.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 6:
                    n6 = n6 + 1
                    if f6 == 0:
                        f6 = 2
                        t6 = oudata['actionTime']
                        near6 = j
                        if j > 0:
                            for i in range(1, j):
                                ne6.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                if oudata['actionType'] == 5:
                    n5 = n5 + 1
                    if f5 == 0:
                        f5 = 2
                        t5 = oudata['actionTime']
                        near5 = j
                        if j > 0:
                            for i in range(1, j):
                                ne7.append(udata.iloc[i]['actionTime'] -
                                           udata.iloc[i - 1]['actionTime'])

                head1 = oudata['actionType']

                #进行时间间隔计算
                if j != 0:
                    ctime = oudata['actionTime']
                    times1 = ctime - ptime
                    ptime = ctime
                    timeslot.append(times1)

                else:
                    ptime = oudata['actionTime']

                if len(timeslot) != 0:
                    maxtime = np.max(timeslot)
                    mintime = np.min(timeslot)

                    averagetime = np.average(timeslot)
                    mediantime = np.median(timeslot)
                    vartime = np.var(timeslot)
                    tail3a = timeslot[-3:]
                    tail3ave = np.average(tail3a)
                    tail3var = np.var(tail3a)

            if n2 + n3 + n4 != 0:
                buy_viewProductN = n9 / (n2 + n3 + n4)
            elif n9 == 0:
                buy_viewProductN = 0
            elif n9 != 0 and n2 + n3 + n4 == 0:
                buy_viewProductN = 1

            if n5 != 0:
                buy_5 = n9 / n5
            else:
                buy_5 = 0

            if n2 + n3 + n4 != 0:
                p59_24 = (n5 + n6 + n7 + n8 + n9) / (n2 + n3 + n4)
            else:
                p59_24 = 1

            if p59_24 > 1:
                p59_24 = 1

            if n1 != 0:
                p24_1 = (n2 + n3 + n4) / n1
            else:
                p24_1 = 0

            totalStep = n1 + n2 + n3 + n4 + n5 + n6 + n7 + n8 + n9
            p1 = n1 / totalStep
            p2 = n2 / totalStep
            p3 = n3 / totalStep
            p4 = n4 / totalStep
            p5 = n5 / totalStep
            p6 = n6 / totalStep
            p7 = n7 / totalStep
            p8 = n8 / totalStep
            p9 = n9 / totalStep

            t8a = 0
            t8v = 0
            t8min = 0
            t8max = 0
            t7a = 0
            t7v = 0
            t7min = 0
            t7max = 0
            t6a = 0
            t6v = 0
            t6min = 0
            t6max = 0
            t5a = 0
            t5v = 0
            t5min = 0
            t5max = 0
            t4a = 0
            t4v = 0
            t4min = 0
            t4max = 0
            t3a = 0
            t3v = 0
            t3min = 0
            t3max = 0
            t2a = 0
            t2v = 0
            t2min = 0
            t2max = 0
            t1a = 0
            t1v = 0
            t1min = 0
            t1max = 0
            t9a = 0
            t9v = 0
            t9min = 0
            t9max = 0
            t9av = 0
            if len(ne9) != 0:
                t9a = np.average(ne9)
                t9v = np.var(ne9)
                t9min = np.min(ne9)
                t9max = np.max(ne9)
                t9av = t9a * t9v

            if len(ne8) != 0:
                t8a = np.average(ne8)
                t8v = np.var(ne8)
                t8min = np.min(ne8)
                t8max = np.max(ne8)

            if len(ne7) != 0:
                t7a = np.average(ne7)
                t7v = np.var(ne7)
                t7min = np.min(ne7)
                t7max = np.max(ne7)

            if len(ne6) != 0:
                t6a = np.average(ne6)
                t6v = np.var(ne6)
                t6min = np.min(ne6)
                t6max = np.max(ne6)
            if len(ne5) != 0:
                t5a = np.average(ne5)
                t5v = np.var(ne5)
                t5min = np.min(ne5)
                t5max = np.max(ne5)
            if len(ne4) != 0:
                t4a = np.average(ne4)
                t4v = np.var(ne4)
                t4min = np.min(ne4)
                t4max = np.max(ne4)

            if len(ne3) != 0:
                t3a = np.average(ne3)
                t3v = np.var(ne3)
                t3min = np.min(ne3)
                t3max = np.max(ne3)

            if len(ne2) != 0:

                t2a = np.average(ne2)
                t2v = np.var(ne2)
                t2min = np.min(ne2)
                t2max = np.max(ne2)
            if len(ne1) != 0:
                t1a = np.average(ne1)
                t1v = np.var(ne1)
                t1min = np.min(ne1)
                t1max = np.max(ne1)

            finalud = {
                "userid": uid,
                'totalStep': totalStep,
                'step1N': n1,
                'step2N': n2,
                'step3N': n3,
                'step4N': n4,
                'step5N': n5,
                'step6N': n6,
                'step7N': n7,
                'step8N': n8,
                'step9N': n9,
                'step1P': p1,
                'step2P': p2,
                'step3P': p3,
                'step4P': p4,
                'step5P': p5,
                'step6P': p6,
                'step7P': p7,
                'step8P': p8,
                'step9P': p9,
                'viewProductN': n2 + n3 + n4,
                'last1time': t1,
                'last2time': t2,
                'last3time': t3,
                'last4time': t4,
                'last5time': t5,
                'last6time': t6,
                'last7time': t7,
                'last8time': t8,
                'last9time': t9,
                'buy/viewProductN': buy_viewProductN,
                'buy/n5': buy_5,
                '(2-4)/(1)': p24_1,
                'tail1': tail1,
                'tail2': tail2,
                'tail3': tail3,
                'tail4': tail4,
                'head1': head1,
                'maxtime': maxtime,
                'mintime': mintime,
                'averagetime': averagetime,
                'vartime': vartime,
                'mediantime': mediantime,
                'tailtime1': tailtime1,
                'tailtime2': tailtime2,
                'tailtime3': tailtime3,
                'tailtime4': tailtime4,
                'tail3ave': tail3ave,
                'tail3var': tail3var,
                't9a': t9a,
                't9v': t9v,
                't9mini': t9min,
                't9max': t9max,
                't9av': t9av,
                't8a': t8a,
                't8v': t8v,
                't8mini': t8min,
                't8max': t8max,
                't7a': t7a,
                't7v': t7v,
                't7mini': t7min,
                't7max': t7max,
                't6a': t6a,
                't6v': t6v,
                't6mini': t6min,
                't6max': t6max,
                't5a': t5a,
                't5v': t5v,
                't5mini': t5min,
                't5max': t5max,
                't4a': t4a,
                't4v': t4v,
                't4mini': t4min,
                't4max': t4max,
                't3a': t3a,
                't3v': t3v,
                't3mini': t3min,
                't3max': t3max,
                't2a': t2a,
                't2v': t2v,
                't2mini': t2min,
                't2max': t2max,
                't1a': t1a,
                't1v': t1v,
                't1mini': t1min,
                't1max': t1max
            }

            newCust = newCust.append(finalud, ignore_index=True)

    newCust.to_csv(outputpath)

    return
def rating_stantistics(inputpath, outputpath):
    cm_data_raw = pd.read_table(inputpath, sep=',', encoding='utf-8')
    #对user action data 进行统计处理。

    newCust = pd.DataFrame(columns=[
        "userid", 'totalrate', 'totalnumber', 'averate', 'numberof1',
        'numberof2', 'numberof3', 'numberof367', 'numberof433', 'numberof4',
        'numberof5', 'lowrate', 'highrate'
    ])
    idlist = []

    for i in range(0, len(cm_data_raw)):
        record = cm_data_raw.iloc[i]
        uid = record['userid']

        if uid not in idlist:
            idlist.append(uid)
            udata = cm_data_raw[cm_data_raw['userid'] == uid]
            udata = sort(udata, ["orderid"], ascending=False)

            #记录总分数
            totalrate = 0
            totalnumber = 0
            averate = 0

            numberof5 = 0
            numberof1 = 0
            numberof2 = 0
            numberof3 = 0
            numberof367 = 0
            numberof433 = 0
            numberof4 = 0
            #3分及其以下为low rate,求其数量
            lowrate = 0
            #4分及其以上为高分
            highrate = 0

            #对一个用户评分信息进行统计
            for j in range(0, len(udata)):

                oudata = udata.iloc[j]
                totalrate = totalrate + oudata['rating']
                totalnumber = totalnumber + 1

                if j == 0:
                    nr = oudata['rating']

                #统计用户评分
                if oudata['rating'] == 1:
                    numberof1 = numberof1 + 1
                    lowrate = lowrate + 1

                if oudata['rating'] == 2:
                    numberof2 = numberof2 + 1
                    lowrate = lowrate + 1
                if oudata['rating'] == 3:
                    numberof3 = numberof3 + 1
                    lowrate = lowrate + 1
                if oudata['rating'] == 3.67:
                    numberof367 = numberof367 + 1
                if oudata['rating'] == 4.33:
                    numberof367 = numberof433 + 1

                if oudata['rating'] == 4:
                    numberof4 = numberof4 + 1

                if oudata['rating'] == 5:
                    numberof5 = numberof5 + 1

            averate = totalrate / totalnumber
            lowrate = numberof1 + numberof2 + numberof3
            highrate = numberof4 + numberof5 + numberof433

            finalud = {
                "userid": uid,
                'totalrate': totalrate,
                'totalnumber': totalnumber,
                'averate': averate,
                'numberof1': numberof1,
                'numberof2': numberof2,
                'numberof3': numberof3,
                'numberof367': numberof367,
                'numberof433': numberof433,
                'numberof4': numberof4,
                'numberof5': numberof5,
                'lowrate': lowrate,
                'highrate': highrate
            }

            newCust = newCust.append(finalud, ignore_index=True)

    newCust.to_csv(outputpath)

    return
Beispiel #14
0
# Generates list of files in path
csv_files = os.listdir(path)

# Define variables
frequency = functions.namesList(csv_files)  # Independent variable
rangeList1 = []  # Going to be sorted from first to last
rangeList2 = []  # Going to be sorted from first to last
meanList1 = []  # Going to be sorted from first to last
meanList2 = []  # Going to be sorted from first to last

#print(frequency)
#print(csv_files)

# Take the directory and append the ranges from their respective columns
for file in functions.sort(csv_files):
    analog1Range = functions.getRange(
        functions.convertToFloat(functions.columnList(path + file, 0)))
    analog2Range = functions.getRange(
        functions.convertToFloat(functions.columnList(path + file, 1)))
    rangeList1.append(analog1Range)
    rangeList2.append(analog2Range)
    analog1Mean = functions.mean(
        functions.convertToFloat(functions.columnList(path + file, 0)))
    analog2Mean = functions.mean(
        functions.convertToFloat(functions.columnList(path + file, 1)))
    meanList1.append(analog1Mean)
    meanList2.append(analog2Mean)
    print("File: " + file)

#print(functions.sort(csv_files))