BASE_FOLDER = os.sep.join(os.path.realpath(__file__).split(os.sep)[0:-1])+os.sep sys.path.append(BASE_FOLDER+"proxy") from proxyconnection import * from sets import Set from dataFileUtils import format_set_data_CSV from dataFileUtils import getEntitiesIds users_file = BASE_FOLDER+"captured"+os.sep+"members_data.csv" sets_from_items_file = BASE_FOLDER+"captured"+os.sep+"outfits_from_garments_file.csv" items_file = BASE_FOLDER+"captured"+os.sep+"garments.csv" visited_items_file = BASE_FOLDER+"captured"+os.sep+"visited_garments.csv" proxy = MyProxy(BASE_FOLDER+"proxy"+os.sep+"working.csv") itemgetter = ItemGetter(proxy) items_ids = getEntitiesIds(items_file) visited_items_ids = getEntitiesIds(visited_items_file) items_ids.difference(visited_items_ids) while items_ids: print "visited: ", len(visited_items_ids) print "new: ", len(items_ids) items_ids = list(items_ids) item_id = items_ids.pop(0) items_ids = Set(items_ids) print "Current: ",item_id visited_items_ids.add(item_id) item_data = itemgetter.get_data(item_id) f = open(sets_from_items_file,'a')
setgetter = SetGetter(proxy) def load_users_sets(): sets = Set() f = open(users_file,'r') for user in f: for setId in user.split(";")[1].split(","): try: sets.add(int(setId)) except: pass f.close() return sets visited_sets = getEntitiesIds(set_file) #user_sets = load_users_sets() remaining_sets = getEntitiesIds( BASE_FOLDER+"captured"+os.sep+"outfits_from_garments_file.csv") new_sets = remaining_sets.difference(visited_sets) while new_sets: print "visited: ", len(visited_sets) print "new: ", len(new_sets) new_sets = list(new_sets) set_id = new_sets.pop(0) new_sets = Set(new_sets) print "Current: ",set_id visited_sets.add(set_id) set_data = setgetter.get_data(set_id) print set_data
def get_image(itemid): url = "http://embed.polyvoreimg.com/cgi/img-thing/size/l/tid/"+str(itemid)+".jpg" image=urllib.URLopener() image.retrieve(url,images_dir+os.sep+str(itemid)+".jpg") """ For each pending item (not already visited) searches its description. If the item it's a clothe, (not a decorative image) stores the gatheret infromation in a file and its picture in a folder. Else, store the ID of the item in the no_items_file """ visited_items = getEntitiesIds(items_file) visited_items = visited_items.union(getEntitiesIds(no_items_file)) valid_categories = Taxonomy().getAllIds() sets_items = load_sets_items() print len(sets_items) new_items = sets_items.difference(visited_items) print len(new_items) new_items = list(new_items) random.shuffle(new_items) print len(new_items) while new_items: #sleep(randint(1,5)) print "visited: ", len(visited_items) print "new: ", len(new_items)