def weeklydataset_sg_ndata(filesource, label_index): trainreader = csv.reader(open(filesource, "rb"), delimiter= ',', quotechar = '"') indexmon = list() indextue = list() indexwed = list() indexthu = list() indexfri = list() indexsat = list() indexsun = list() weekindex = [indexmon, indextue, indexwed, indexthu, indexfri, indexsat, indexsun] # vectorDataSet will contain the request from monday to sunday #and the request are sorted by the hour count = 0 nline = 0 for row in trainreader: nline += 1 try: count = len(row) rawdate = row[1] standarddate = time.gmtime(toepoch(rawdate)) row[5] = float(row[5]) row[6] = float(row[6]) row[7] = float(row[7]) weekindex[weekday(standarddate[0], standarddate[1], standarddate[2])].append(row) except: print nline vectorDataSet = [] for column in range(count - len(label_index)): vectorDataSet.append(list()) label = list() for column in range(len(label_index)): label.append(list()) for index in weekindex: sortedindex = sorted(index, key=lambda hour : time.gmtime(toepoch(hour[1]))[4]) sortedindex = sorted(sortedindex, key=lambda hour : time.gmtime(toepoch(hour[1]))[3]) # vectorDataSet.append(sortedindex) for index2 in sortedindex: labelcounter = 0 vectorcounter = 0 for item in range(len(index2)): if item in label_index: label[labelcounter].append(index2[item]) labelcounter = labelcounter+1 else: vectorDataSet[vectorcounter].append(index2[item]) vectorcounter = vectorcounter+1 # train.append(index2[item]) return vectorDataSet, label
def aggregateby10mins_sg_mcmc_ndata(timestamps, numbercluster): mon = [] [mon.append([0]*2) for i in range(144)] tue = [] [tue.append([0]*2) for i in range(144)] wed = [] [wed.append([0]*2) for i in range(144)] thu = [] [thu.append([0]*2) for i in range(144)] fri = [] [fri.append([0]*2) for i in range(144)] sat = [] [sat.append([0]*2) for i in range(144)] sun = [] [sun.append([0]*2) for i in range(144)] week = [mon, tue, wed, thu, fri, sat, sun] for line in timestamps: standarddate = time.gmtime(toepoch(line)) dweek = standarddate[6] hour = standarddate[3] min = standarddate[4] # weeknumber = datetime.datetime(standarddate[0], standarddate[1], standarddate[2],0,0).isocalendar()[1] if (standarddate[2] <= 7): week[dweek][(hour *6) + min/10] [0] +=1 else: week[dweek][(hour *6) + min/10] [1] +=1 target = mon + tue + wed + thu + fri + sat + sun input = [inp for inp in range(144*5)] x = [inp for inp in range(144*7)] tmp = zip(*target) y1 = tmp[0] y2 = tmp[1] print "len x = %f" % len(x) print "len y1 = %f" % len(y1) print "len y2 = %f" % len(y2) fig = figure() ax1 = fig.add_subplot(1,1,1) ax1.scatter(x, y1, c='b') ax1.scatter(x, y2, c='r') # ax1.axis([0,max(x)+10,0,max(target)+100]) ax1.set_xlabel('minute of the week') ax1.set_ylabel('Number of requests') fig.savefig("aggregation_cluster_%d" % (numbercluster), format='png') return target
def aggregateby10mins_sg_ndata_avg(timestamps, numbercluster): mon = [0]*144 tue = [0]*144 wed = [0]*144 thu = [0]*144 fri = [0]*144 sat = [0]*144 sun = [0]*144 week = [mon, tue, wed, thu, fri, sat, sun] for line in timestamps: standarddate = time.gmtime(toepoch(line)) dweek = standarddate[6] hour = standarddate[3] min = standarddate[4] week[dweek][(hour *6) + min/10] +=1 # for wday in week: # for m in wday: # m = m/53 target = mon + tue + wed + thu + fri + sat + sun # for i in range(len(target)): # if target[i] > 500: # target[i] = 500 input = [inp for inp in range(144*5)] x = [inp for inp in range(144*7)] y = week fig = figure() ax1 = fig.add_subplot(1,1,1) ax1.scatter(x, y) ax1.axis([0,max(x)+10,0,max(target)+100]) ax1.set_xlabel('minute of the week') ax1.set_ylabel('Number of requests') fig.savefig("aggregation_cluster_%d" % (numbercluster), format='png') # show() return x, target
def aggregateby30sec_sg_ndata_avg(timestamps): mon = [0]*2880 tue = [0]*2880 wed = [0]*2880 thu = [0]*2880 fri = [0]*2880 sat = [0]*2880 sun = [0]*2880 week = [mon, tue, wed, thu, fri, sat, sun] for line in timestamps: standarddate = time.gmtime(toepoch(line)) dweek = standarddate[6] hour = standarddate[3] min = standarddate[4] week[dweek][(hour *120) + (2*min)] +=1 # for wday in week: # for m in wday: # m = m/53 target = mon + tue + wed + thu + fri + sat + sun # for i in range(len(target)): # if target[i] > 500: # target[i] = 500 input = [inp for inp in range(144*5)] x = [inp for inp in range(2880*7)] y = week fig = figure() ax1 = fig.add_subplot(1,1,1) ax1.scatter(x, y) ax1.axis([0,max(x)+10,0,max(target)+100]) ax1.set_xlabel('minute of the week') ax1.set_ylabel('Number of requests') show() return x, target