Ejemplo n.º 1
0
def main():
  '''
  The menu shown at the beginning of the Page Rank program. It asks whether the
  file is a csv or snap file, then the file name.
   
  Ex:
    > python lab3.py [-w, -c]
    CSC 466: Lab 3 - Page Rank & Link Analysis
    Parse:
      1) csv
      2) snap
    (User enters 1 or 2)
    File name: (User enters file name here)
   
  There is an optional flag '-w' that is used for the Football csv. The program
  outputs every 1000 lines (to ensure that it's parsing) and then at the end of
  the page rank algorithm, print out the top 20 nodes and how long it took to 
  calculate page rank.
   
  Note: -w doesn't quite work at the moment. Please ignore it for now.
  '''
    
  fileSpecs = specs()
  
  is_weighted = False      # Used for '-w' flag

  # Setting variable if '-w' is used
  if len(sys.argv) > 1:
    if sys.argv[1] == '-w':
      is_weighted = True

  # Menu
  print('CSC 466: Lab 3 - PageRank & Link Analysis')
  parse_menu = raw_input('Parse:\n' +
                         '1) csv\n' +
                         '2) snap\n'
                        )
  file_name = raw_input('File name: ')
  version = raw_input('Xeon Phi, Cuda or Both? (x/c/b): ')
  
  # PARSING - CSV Files
  # Note: The algorithm is the same, just parsing is different.
  if parse_menu == '1' or parse_menu == '2':
    if parse_menu == '1':
      print('Parsing/Creating Graph...')
      start = time.time()    # Tracking time
    
      # Parses a csv file and returns a tuple (list, dictionary, dictionary)
      if is_weighted == False:
        (nodes, out_degrees, in_degrees, names) = parser.parse_csv(file_name)
      else:
        (nodes, out_degrees, in_degrees, names) = parser.parse_weighted_csv(file_name)
     
      end = time.time()
      print('Parse/Graph Set-up Time: ' + str(end - start) + ' seconds')

  # PARSING - SNAP Files
    else:
      print('Parsing/Creating Graph...')
      start = time.time()    # Tracking time
    
      # Parses a SNAP file and returns a tuple (list, dictionary, dictionary)
      (nodes, out_degrees, in_degrees, names) = parser.parse_snap(file_name)
   
      end = time.time()
      print('Parse/Graph Set-up Time: ' + str(end-start) + 'seconds')
    
    if file_name.rfind('/') != '-1':
       file_name = file_name[file_name.rfind('/') + 1:len(file_name)]

    (numIterations, numNodes, numEdges) = fileSpecs[file_name]
    print numIterations, numNodes, numEdges
    print out_degrees.get(12028)
    
    '''
    Call C Program
    '''
    
    if version == 'x' or version == 'b':
       p = subprocess.Popen(['./pr_phi', str(numNodes), str(numEdges), str(numIterations)], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
       
       for node in nodes:
          if in_degrees.get(node) is not None:
             for i in range (0, len(in_degrees[node])):
                p.stdin.write('%d %d\n' % (int(node), int(in_degrees[node][i])))
             
       output = p.communicate()[0]
       output = output[:-1]
       p.stdin.close()
       
       useNames = parse_menu == '1' or file_name == "wiki-Vote.txt"
       print str(useNames)
       printPageRankValues(output, useNames, names)

    if version == 'c' or version == 'b':
       p = subprocess.Popen(cuda_command, stdout=subprocess.PIPE, stdin=subprocess.PIPE)
       '''
       for node in nodes:
          for i in range (0, len(in_degress[node])):
             p.stdin.write('%d %d\n' % (int(node), int(in_degrees[node][i])))
       '''
    
    # Sets up page rank structures
    '''
    pagerank.set_up(nodes, out_degrees, in_degrees)
    
    if file_name == 'wiki-Vote.txt':
        parse_menu = '1'

    # PAGE RANKING
    print('Page Ranking...')
    start = time.time()
    num_iters = pagerank.page_rank(0, names, parse_menu)  # Stores # of page rank iterations
    end = time.time()
    
    # Statistics
    print('Page Rank Time: ' + str(end-start) + ' seconds')
    print('Page Rank Iterations: ' + str(num_iters))
    '''
  # Wrong input
  else:
    print('Invalid input - exiting')
Ejemplo n.º 2
0
def main():
  '''
  The menu shown at the beginning of the Page Rank program. It asks whether the
  file is a csv or snap file, then the file name.
   
  Ex:
    > python lab3.py [-w]
    CSC 466: Lab 3 - Page Rank & Link Analysis
    Parse:
      1) csv
      2) snap
    (User enters 1 or 2)
    File name: (User enters file name here)
   
  There is an optional flag '-w' that is used for the Football csv. The program
  outputs every 1000 lines (to ensure that it's parsing) and then at the end of
  the page rank algorithm, print out the top 20 nodes and how long it took to 
  calculate page rank.
   
  Note: -w doesn't quite work at the moment. Please ignore it for now.
  '''
  
  is_weighted = False      # Used for '-w' flag

  # Setting variable if '-w' is used
  if len(sys.argv) > 1:
    if sys.argv[1] == '-w':
      is_weighted = True

  # Menu
  print('CSC 466: Lab 3 - PageRank & Link Analysis')
  parse_menu = raw_input('Parse:\n' +
                         '1) csv\n' +
                         '2) snap\n'
                        )
  file_name = raw_input('File name: ')
  
  # PARSING - CSV Files
  # Note: The algorithm is the same, just parsing is different.
  if parse_menu == '1':
    print('Parsing/Creating Graph...')
    start = time.time()    # Tracking time
    
    # Parses a csv file and returns a tuple (list, dictionary, dictionary)
    if is_weighted == False:
      (nodes, out_degrees, in_degrees) = parser.parse_csv(file_name)
    else:
      (nodes, out_degrees, in_degrees) = parser.parse_weighted_csv(file_name)
      
    end = time.time()
    print('Parse/Graph Set-up Time: ' + str(end - start) + ' seconds')

    # Sets up page rank structures
    pagerank.set_up(nodes, out_degrees, in_degrees)

    # PAGE RANKING
    print('Page Ranking...')
    start = time.time()
    num_iters = pagerank.page_rank(0)  # Stores # of page rank iterations
    end = time.time()
    
    # Statistics
    print('Page Rank Time: ' + str(end-start) + ' seconds')
    print('Page Rank Iterations: ' + str(num_iters))

  # PARSING - SNAP Files
  elif parse_menu == '2':
    print('Parsing/Creating Graph...')
    start = time.time()    # Tracking time
    
    # Parses a SNAP file and returns a tuple (list, dictionary, dictionary)
    (nodes, out_degrees, in_degrees) = parser.parse_snap(file_name)
    
    end = time.time()
    print('Parse/Graph Set-up Time: ' + str(end-start) + 'seconds')

    # Sets up page rank structures
    pagerank.set_up(nodes, out_degrees, in_degrees)

    # PAGE RANKING
    print('Page Ranking...')
    start = time.time()
    num_iters = pagerank.page_rank(0)  # Stores # of page rank iterations
    end = time.time()
    
    # Statistics
    print('Page Rank Time: ' + str(end-start) + ' seconds')
    print('Page Rank Iterations: ' + str(num_iters))
  
  # Wrong input
  else:
    print('Invalid input - exiting')
Ejemplo n.º 3
0
def main():
    """
  The menu shown at the beginning of the Page Rank program. It asks whether the
  file is a csv or snap file, then the file name.
   
  Ex:
    > python lab3.py [-w, -c]
    CSC 466: Lab 3 - Page Rank & Link Analysis
    Parse:
      1) csv
      2) snap
    (User enters 1 or 2)
    File name: (User enters file name here)
   
  There is an optional flag '-w' that is used for the Football csv. The program
  outputs every 1000 lines (to ensure that it's parsing) and then at the end of
  the page rank algorithm, print out the top 20 nodes and how long it took to 
  calculate page rank.
   
  Note: -w doesn't quite work at the moment. Please ignore it for now.
  """

    fileSpecs = specs()

    is_weighted = False  # Used for '-w' flag

    # Setting variable if '-w' is used
    if len(sys.argv) > 1:
        if "-w" in sys.argv:
            is_weighted = True

    # Menu
    print ("CSC 466: Lab 3 - PageRank & Link Analysis")
    parse_menu = "1"
    file_name = sys.argv[1]
    version = sys.argv[2]
    if file_name[-3:] == "csv":
        parse_menu = "1"
    elif file_name[-3:] == "txt":
        parse_menu = "2"
    else:
        print "Format not supported"

    # PARSING - CSV Files
    # Note: The algorithm is the same, just parsing is different.
    if parse_menu == "1" or parse_menu == "2":
        if parse_menu == "1":
            print ("Parsing/Creating Graph...")
            start = time.time()  # Tracking time

            # Parses a csv file and returns a tuple (list, dictionary, dictionary)
            if is_weighted == False:
                (nodes, out_degrees, in_degrees, names) = parser.parse_csv(file_name)
            else:
                (nodes, out_degrees, in_degrees, names) = parser.parse_weighted_csv(file_name)

            end = time.time()
            print ("Parse/Graph Set-up Time: " + str(end - start) + " seconds")

        # PARSING - SNAP Files
        else:
            print ("Parsing/Creating Graph...")
            start = time.time()  # Tracking time

            # Parses a SNAP file and returns a tuple (list, dictionary, dictionary)
            (nodes, out_degrees, in_degrees, names) = parser.parse_snap(file_name)

            end = time.time()
            print ("Parse/Graph Set-up Time: " + str(end - start) + "seconds")

        if file_name.rfind("/") != "-1":
            file_name = file_name[file_name.rfind("/") + 1 : len(file_name)]

        (numIterations, numNodes, numEdges) = fileSpecs[file_name]
        print numIterations, numNodes, numEdges

        """
    Call C Program
    """

        if version == "phi" or version == "both":
            p = subprocess.Popen(
                ["./pr_test", str(numNodes), str(numEdges), str(numIterations)],
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )

            start = time.time()
            for node in nodes:
                if in_degrees.get(node) is not None:
                    for i in range(0, len(in_degrees[node])):
                        p.stdin.write("%d\n" % int(in_degrees[node][i]))

            for node in nodes:
                i = len(in_degrees[node]) if in_degrees.get(node) is not None else 0
                j = out_degrees[node] if out_degrees.get(node) is not None else 0
                p.stdin.write("%d %d %d\n" % (int(node), int(i), int(j)))

            output = p.communicate()[0]
            output = output[:-1]
            p.stdin.close()
            useNames = parse_menu == "1" or file_name == "wiki-Vote.txt"
            printPageRankValues(output, useNames, names)

        if version == "cuda" or version == "both":
            p = subprocess.Popen(
                ["./pr_cuda", str(numNodes), str(numEdges), str(numIterations)],
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
            )
            for node in nodes:
                if in_degrees.get(node) is not None:
                    for i in range(0, len(in_degrees[node])):
                        p.stdin.write("%d\n" % int(in_degrees[node][i]))

            for node in nodes:
                i = len(in_degrees[node]) if in_degrees.get(node) is not None else 0
                j = out_degrees[node] if out_degrees.get(node) is not None else 0
                p.stdin.write("%d %d %d\n" % (int(node), int(i), int(j)))

            output = p.communicate()[0]
            output = output[:-1]
            p.stdin.close()
            useNames = parse_menu == "1" or file_name == "wiki-Vote.txt"
            print str(useNames)
            printPageRankValues(output, useNames, names)

    # Wrong input
    else:
        print ("Invalid input - exiting")