Ejemplo n.º 1
0
def get_cookie(email,passw,filter_):
	#function returns vk.com cookie; arguments -- email, password & news filter
	url = 'http://login.vk.com/?act=login'	

	values = {
		'expire' : '',
		'vk': '1',
		'email' : email,
		'pass' : passw
		}

	data = urllib.urlencode(values)

	headers = {
		'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2',
		'Accept': 'text/html,application/xhtml+xml,application/xml',
		'Accept-Language': 'en-us,en',
		'Accept-Charset': 'ISO-8859-1,utf-8',
		'Referer': 'http://vk.com/index.php',
		'Cookie': 'remixlang=3; remixchk=5',
		'Content-Type': 'application/x-www-form-urlencoded',
		'Content-Length': str(len(data))
		}

	Response = urllib2.urlopen(urllib2.Request(url, data, headers))
	string = Response.read()
	
	string = get_between(string,0,'\'s\'','/>')
	if string == None:
		print 'None :('
		return None

	values = 'remixlang=3; remixchk=5; remixsid=' + get_between(string,0,'value=\'','\'') + '; remixnews_privacy_filter=0; remixnews_types=' + filter_
	return values
Ejemplo n.º 2
0
def pars_all(page):
	initial_date = 0
	statuses = []
	date = get_between(page,0,'<div class="feedDay">','</div>')
	iterations_counter = 0
	while iterations_counter < 55:
		iterations_counter += 1
		if page.find('<div class="feedDay">',initial_date) == -1:
			break
		if initial_date != 0:
			date = get_between(page,initial_date - len('<div class="feedDay">') - 2,'<div class="feedDay">','</div>')
		if not date:
			print 'can\'t pars date [pars_all function]'
			return None
		fragment = get_between(page,initial_date,'<div class="feedDay">','<div class="feedDay">')
		if fragment == None:
			fragment = page[page.find('<div class="feedDay">',initial_date):]
			try:
				statuses.extend(pars_fragment(fragment,date));
			except TypeError:
				print 'pars_fragment returns invalid value [pars_all function]'
				return None
			break
		try:
			statuses.extend(pars_fragment(fragment,date));
		except TypeError:
			print 'pars_fragment returns invalid value [pars_all function]'
			return None
		
		#searching in the next iteration will be start from the character with number:
		initial_date = page.find('<div class="feedDay">',page.find('<div class="feedDay">',initial_date) + 1)
	if iterations_counter == 55:
		print 'pars error [pars_all function]'
		return None
	return statuses
Ejemplo n.º 3
0
def pars_status(page, initial):
	#extract status, uid & name from vk-page, all news except statuses must be disabled
	if page.find('feedStory',initial) == -1:
		return '', '', '', ''
	string = get_between(page, initial,'<td class="feedStory">','</td>')
	string_regexp = re.compile(r'href="/(?P<uid>.+?)">(?P<name>.+?)</a>(?P<status_with_whitespaces>.*?)(<div|$)',re.DOTALL)
	string_match = string_regexp.search(string)
	if string_match == None:
		print 'invalid status format [pars_status function]'
		return '0','0','0','0'
	
	uid = string_match.group('uid')
	name = string_match.group('name')
	status = re.sub(r'\s*$','',re.sub(r'^\s*','',string_match.group('status_with_whitespaces'))) #remove leftmost and rightmost whitespaces

	if len(status) == 0:
		return '0','0','0','0'
	
	time = get_between(get_between(page,initial,'<td class="feedTime">','</td>'),0,'<div>','</div>')
	
	return uid, name, status, time