def writeData(data,fname): """Write a data into csv and pickle objec. :param data: The data to be wrote :type data: A list of tuple :param fname: A filename :type fname: String :returns: None """ dr.tupleToCSV(data, "result/"+fname,None)
def statistic(f,t): """Compute the numbers of somethings for a given years range. Read the file with name imdb_year.csv in data folder. :param f: From year :type f: Integer :param t: To year :type t: Integer :returns: A list of (year:number) """ something = {u'actors':11, u'writers':6, u'directors':9, u'genres':13, u'countries':5, u'languages':3, } data = readDataInRange(f,t,True,True) datalist = readDataInRange(f,t,False, False) bigres = {u'actors':[], u'writers':[], u'directors':[], u'genres':[], u'countries':[], u'languages':[] } for y in sorted(data.keys()): details = data[y] for key, value in something.items(): l = listToList([x[value] for x in details]) bigres[key].extend(l) # do top-n analyses top_n_produktiv_something_in_range(f,t,bigres) # for key, value in bigres.items(): v = nltk.FreqDist(value) bigres[key] = len(v.keys()) bigres["movies"] = len(datalist) rating = [float(x[1][2]) for x in datalist if float(x[1][2]) != 0] maxrating = numpy.max(rating) minrating = numpy.min(rating) bigres["average_rating"] = numpy.average(rating) bigres["max_rating"] = maxrating bigres["min_rating"] = minrating runtime = [x[1][3] for x in datalist] p = reg.compile("\d+") runtime = [float(reg.match(p,x[1][3]).group(0)) for x in datalist if x[1][3] != "Unknown"] runtime_sum = numpy.sum(runtime) runtime_average = numpy.average(runtime) runtime_min = numpy.min(runtime) runtime_max = numpy.max(runtime) bigres["runtime_sum"] = runtime_sum bigres["runtime_average"] = runtime_average bigres["runtime_min"] = runtime_min bigres["runtime_max"] = runtime_max if t == f: fn = "statistic_in_{f}".format(f=f) else: fn = "statistic_in_{f}_{t}".format(t=t, f=f) c = [(k, v) for (k,v) in bigres.items()] dr.tupleToCSV(c,"result/"+fn)