df=df.groupby("time").count() index = df.index lock.acquire() if z=="malware": for i in index: malware_dict[i] = malware_dict.get(i,0)+int(df[df.index==i].iloc[0].sha256) else: for i in index: benign_dict[i] = benign_dict.get(i,0)+int(df[df.index==i],iloc[0].sha256) lock.release() if __name__ == '__main__': lock = Lock() for z in [malware_path,benign_path]: pool = Pool(16) for i in range(16): for j in range(16): for k in range(16): current_dir = z +'/'.join([hex(i),hex(j),hex(k)])+'/' pool.appy_async(func=get_year_count,args=(current_dir,)) pool.close() pool.join() if z == malware_path: malware_dict['type'] = ['malware' for i in range(len(malware_dict['year'])] else: benign_dict['type'] = ['benign' for i in range(len(benign_dict['year'])] total = pd.concat([pd.DataFrame(malware_dict),pd.DataFrame(benign_dict)]) total.to_csv("/data/total_list.csv",index=False)