def addSource(request): if request.method == 'POST': # if the form has been submitted form = SourceForm(request.POST,request.FILES) # a form is bound to the Post data if form.is_valid(): # validation passes # upload file if request.FILES: handleUploadedFile(request.FILES['file']) # process submitted form fields author = form.cleaned_data['author'] source = form.cleaned_data['source'] title = form.cleaned_data['title'] file = form.cleaned_data['file'] url = form.cleaned_data['url'] #type = form.cleaned_data['type'] tags = form.cleaned_data['tags'] tags = tags.split(',') # break into separate tags insight = form.cleaned_data['insight'] companies = form.cleaned_data['companies'] newSource = Source(source=source,url=url,title=title,insight=insight,author=author) newSource.save() for c in companies: newSource.company.add(c) for tag in tags: newTag = None try: newTag = Tag.objects.get(name=tag) except ObjectDoesNotExist: newTag = Tag.objects.create(name=tag) newSource.tags.add(newTag) newSource.save() return HttpResponseRedirect('/source/'+str(newSource.id)) # redirect to source detail page else: form = SourceForm() # unbound form, passes to template segment_list = Segment.objects.all().order_by('name') recent_news = Source.objects.all().order_by('-modified')[:5] return render_to_response('market/source/add.html', { 'form':form, 'segment_list':segment_list, 'recent_news':recent_news }, context_instance=RequestContext(request) )
def importSources(): # initialize list for sources that don't find company matches badSources = [] f = open('ext_data/wordnik_sources.csv','rU') c = csv.DictReader(f) for row in c: # make sure it's not a general article; I'll handle these manually later if row['Company'] != 'General': # set url to source field url = row['Source'] source = '' # crummy list of regex to match url to source reDict = { r'crunchbase':'CrunchBase', r'techcrunch':'TechCrunch', r'allthingsd':'AllThingsD', r'gigaom':'GigaOM', r'readwriteweb':'ReadWriteWeb', r'pcmag':'PC Magazine', r'searchenginewatch':'Search Engine Watch', r'mashable':'Mashable', r'quora':'Quora', r'businessinsider':'Business Insider', r'quantcast':'Quantcast', r'time':'Time', r'nytimes':'NY Times', r'tumblr':'Tumblr', r'twitter':'Twitter', r'fastcompany': 'Fast Company', r'venturebeat': 'Venture Beat', r'wsg':'Wall Street Journal', r'dowjones':'DowJones.com', r'zdnet':'ZD Net', r'jackmyers': 'Jackmyers.com', r'businessweek':'BusinessWeek', r'guardian': 'Guardian', r'paidcontent': 'PaidContent.org', r'techaviv':'Techaviv', r'thenextweb':'TheNextWeb', r'wired':'Wired', r'scobleizer':'Scobleizer', r'cnet':'CNet', r'buzzmachine':'Buzzmachine', r'arstechnica':'Ars Technica', r'adage': 'Ad Age', r'ycombinator' : 'Hacker News' } # match url to particular source for k,v in reDict.iteritems(): if re.search(k,url): source = v # try to match source to particular company try: comp = Company.objects.get(name=row['Company']) except ObjectDoesNotExist: # if can't match company, add line to review later, continue badSources.append(row) continue print row['Company'] s = Source(source=unicode(source,'utf-8','replace'),url=unicode(url,'utf-8','replace'),title=unicode(row['Title'],'utf-8','replace')) s.save() s.company.add(comp) s.save()